Skip to content

Commit 1db7279

Browse files
committed
Optimized API to get get partition keys/names and uris.
1 parent db54bfd commit 1db7279

File tree

15 files changed

+334
-91
lines changed

15 files changed

+334
-91
lines changed

metacat-common-server/src/main/java/com/facebook/presto/spi/ConnectorSplitDetailManager.java

+22
Original file line numberDiff line numberDiff line change
@@ -63,4 +63,26 @@ SavePartitionResult savePartitions(ConnectorTableHandle table, List<ConnectorPar
6363
default List<SchemaTablePartitionName> getPartitionNames(String uri, boolean prefixSearch){
6464
return Lists.newArrayList();
6565
}
66+
67+
/**
68+
* Gets the partition names/keys based on a filter expression for the specified table.
69+
* @param table table handle
70+
* @param filterExpression JSP based filter expression string
71+
* @param partitionNames filter the list that matches the given partition names. If null or empty, it will return all.
72+
* @return filtered list of partition names
73+
*/
74+
default List<String> getPartitionKeys(ConnectorTableHandle table, String filterExpression, List<String> partitionNames, Sort sort, Pageable pageable){
75+
return Lists.newArrayList();
76+
}
77+
78+
/**
79+
* Gets the partition uris based on a filter expression for the specified table.
80+
* @param table table handle
81+
* @param filterExpression JSP based filter expression string
82+
* @param partitionNames filter the list that matches the given partition names. If null or empty, it will return all.
83+
* @return filtered list of partition uris
84+
*/
85+
default List<String> getPartitionUris(ConnectorTableHandle table, String filterExpression, List<String> partitionNames, Sort sort, Pageable pageable){
86+
return Lists.newArrayList();
87+
}
6688
}

metacat-common/src/main/java/com/netflix/metacat/common/api/PartitionV1.java

+4-28
Original file line numberDiff line numberDiff line change
@@ -318,10 +318,7 @@ List<String> getPartitionKeys(
318318
Integer offset,
319319
@ApiParam(value = "Size of the partition list", required = false)
320320
@QueryParam("limit")
321-
Integer limit,
322-
@ApiParam(value = "Whether to include user metadata information to the response", required = false)
323-
@DefaultValue("false") @QueryParam("includeUserMetadata")
324-
Boolean includeUserMetadata
321+
Integer limit
325322
);
326323

327324
@GET
@@ -363,10 +360,7 @@ List<String> getPartitionKeys(
363360
Integer offset,
364361
@ApiParam(value = "Size of the partition list", required = false)
365362
@QueryParam("limit")
366-
Integer limit,
367-
@ApiParam(value = "Whether to include user metadata information to the response", required = false)
368-
@DefaultValue("false") @QueryParam("includeUserMetadata")
369-
Boolean includeUserMetadata
363+
Integer limit
370364
);
371365

372366
@POST
@@ -403,9 +397,6 @@ List<String> getPartitionKeysForRequest(
403397
@ApiParam(value = "Size of the partition list", required = false)
404398
@QueryParam("limit")
405399
Integer limit,
406-
@ApiParam(value = "Whether to include user metadata information to the response", required = false)
407-
@DefaultValue("false") @QueryParam("includeUserMetadata")
408-
Boolean includeUserMetadata,
409400
@ApiParam(value = "Request containing the filter expression for the partitions", required = false)
410401
GetPartitionsRequestDto getPartitionsRequestDto
411402
);
@@ -447,9 +438,6 @@ List<String> getPartitionKeysForRequest(
447438
@ApiParam(value = "Size of the partition list", required = false)
448439
@QueryParam("limit")
449440
Integer limit,
450-
@ApiParam(value = "Whether to include user metadata information to the response", required = false)
451-
@DefaultValue("false") @QueryParam("includeUserMetadata")
452-
Boolean includeUserMetadata,
453441
@ApiParam(value = "Request containing the filter expression for the partitions", required = false)
454442
GetPartitionsRequestDto getPartitionsRequestDto
455443
);
@@ -490,10 +478,7 @@ List<String> getPartitionUris(
490478
Integer offset,
491479
@ApiParam(value = "Size of the partition list", required = false)
492480
@QueryParam("limit")
493-
Integer limit,
494-
@ApiParam(value = "Whether to include user metadata information to the response", required = false)
495-
@DefaultValue("false") @QueryParam("includeUserMetadata")
496-
Boolean includeUserMetadata
481+
Integer limit
497482
);
498483

499484
@GET
@@ -535,10 +520,7 @@ List<String> getPartitionUris(
535520
Integer offset,
536521
@ApiParam(value = "Size of the partition list", required = false)
537522
@QueryParam("limit")
538-
Integer limit,
539-
@ApiParam(value = "Whether to include user metadata information to the response", required = false)
540-
@DefaultValue("false") @QueryParam("includeUserMetadata")
541-
Boolean includeUserMetadata
523+
Integer limit
542524
);
543525

544526
@POST
@@ -575,9 +557,6 @@ List<String> getPartitionUrisForRequest(
575557
@ApiParam(value = "Size of the partition list", required = false)
576558
@QueryParam("limit")
577559
Integer limit,
578-
@ApiParam(value = "Whether to include user metadata information to the response", required = false)
579-
@DefaultValue("false") @QueryParam("includeUserMetadata")
580-
Boolean includeUserMetadata,
581560
@ApiParam(value = "Request containing the filter expression for the partitions", required = false)
582561
GetPartitionsRequestDto getPartitionsRequestDto
583562
);
@@ -619,9 +598,6 @@ List<String> getPartitionUrisForRequest(
619598
@ApiParam(value = "Size of the partition list", required = false)
620599
@QueryParam("limit")
621600
Integer limit,
622-
@ApiParam(value = "Whether to include user metadata information to the response", required = false)
623-
@DefaultValue("false") @QueryParam("includeUserMetadata")
624-
Boolean includeUserMetadata,
625601
@ApiParam(value = "Request containing the filter expression for the partitions", required = false)
626602
GetPartitionsRequestDto getPartitionsRequestDto
627603
);

metacat-hive-connector/src/main/java/com/netflix/metacat/hive/connector/BaseMetacatHiveMetastore.java

+6-2
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ public List<Table> getTablesByNames(String dbName, List<String> tableNames) {
117117

118118
public List<Partition> getPartitions(String dbName, String tableName, String filter) {
119119
try (HiveMetastoreClient client = clientProvider.createMetastoreClient()){
120-
return client.get_partitions_by_filter( dbName, tableName, filter, (short)0);
120+
return client.get_partitions_by_filter(dbName, tableName, filter, (short) 0);
121121
} catch (NoSuchObjectException e) {
122122
throw new TableNotFoundException(new SchemaTableName(dbName, tableName), e);
123123
}catch (Exception e) {
@@ -127,7 +127,11 @@ public List<Partition> getPartitions(String dbName, String tableName, String fil
127127

128128
public List<Partition> getPartitions(String dbName, String tableName, List<String> partitionIds) {
129129
try (HiveMetastoreClient client = clientProvider.createMetastoreClient()){
130-
return client.get_partitions_by_names( dbName, tableName, partitionIds);
130+
if( partitionIds != null && !partitionIds.isEmpty()) {
131+
return client.get_partitions_by_names(dbName, tableName, partitionIds);
132+
} else {
133+
return client.get_partitions( dbName, tableName, (short) 0);
134+
}
131135
} catch (NoSuchObjectException e) {
132136
throw new TableNotFoundException(new SchemaTableName(dbName, tableName), e);
133137
}catch (Exception e) {

metacat-hive-connector/src/main/java/com/netflix/metacat/hive/connector/HiveSplitDetailManager.java

+77-14
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,10 @@
1313

1414
package com.netflix.metacat.hive.connector;
1515

16+
import com.facebook.presto.exception.InvalidMetaException;
1617
import com.facebook.presto.exception.PartitionAlreadyExistsException;
1718
import com.facebook.presto.exception.PartitionNotFoundException;
19+
import com.facebook.presto.hadoop.shaded.com.google.common.collect.Maps;
1820
import com.facebook.presto.hive.DirectoryLister;
1921
import com.facebook.presto.hive.ForHiveClient;
2022
import com.facebook.presto.hive.HdfsEnvironment;
@@ -46,6 +48,7 @@
4648
import com.google.common.collect.Sets;
4749
import com.netflix.metacat.common.partition.util.PartitionUtil;
4850
import com.netflix.metacat.hive.connector.util.ConverterUtil;
51+
import org.apache.hadoop.hive.metastore.Warehouse;
4952
import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
5053
import org.apache.hadoop.hive.metastore.api.FieldSchema;
5154
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
@@ -61,6 +64,8 @@
6164
import java.util.Optional;
6265
import java.util.Set;
6366
import java.util.concurrent.ExecutorService;
67+
import java.util.function.Consumer;
68+
import java.util.function.Predicate;
6469
import java.util.stream.Collectors;
6570

6671
import static com.facebook.presto.hive.HiveUtil.schemaTableName;
@@ -98,28 +103,55 @@ private List<ConnectorPartition> getPartitions(SchemaTableName schemaTableName,
98103
List<String> partitionIds,
99104
Sort sort, Pageable pageable,
100105
boolean includePartitionDetails) {
106+
List<ConnectorPartition> result = getPartitions( schemaTableName, filterExpression, partitionIds);
107+
if( pageable != null && pageable.isPageable()){
108+
int limit = pageable.getOffset() + pageable.getLimit();
109+
if( result.size() < limit){
110+
limit = result.size();
111+
}
112+
if( pageable.getOffset() > limit) {
113+
result = Lists.newArrayList();
114+
} else {
115+
result = result.subList(pageable.getOffset(), limit);
116+
}
117+
}
118+
return result;
119+
}
120+
121+
private List<ConnectorPartition> getPartitions(SchemaTableName schemaTableName, String filterExpression,
122+
List<String> partitionIds) {
101123
List<ConnectorPartition> result = Lists.newArrayList();
102124
List<String> queryPartitionIds = Lists.newArrayList();
125+
Table table = metastore.getTable( schemaTableName.getSchemaName(), schemaTableName.getTableName())
126+
.orElseThrow(() -> new TableNotFoundException(schemaTableName));
127+
Map<String,Partition> partitionMap = null;
103128
if (!Strings.isNullOrEmpty(filterExpression)) {
104-
queryPartitionIds = metastore
105-
.getPartitionNamesByParts(schemaTableName.getSchemaName(), schemaTableName.getTableName(),
106-
Lists.newArrayList(PartitionUtil.getPartitionKeyValues(filterExpression).values())).orElse(Lists.newArrayList());
107-
}
108-
if (partitionIds != null) {
109-
queryPartitionIds.addAll(partitionIds);
129+
Map<String,Partition> filteredPartitionMap = Maps.newHashMap();
130+
List<Partition> partitions = ((MetacatHiveMetastore)metastore).getPartitions( schemaTableName.getSchemaName(), schemaTableName.getTableName(), filterExpression);
131+
partitions.forEach(partition -> {
132+
String partitionName = null;
133+
try {
134+
partitionName = Warehouse.makePartName(table.getPartitionKeys(), partition.getValues());
135+
} catch (Exception e) {
136+
throw new InvalidMetaException("One or more partition names are invalid.", e);
137+
}
138+
if (partitionIds == null || partitionIds.contains(partitionName)) {
139+
filteredPartitionMap.put(partitionName, partition);
140+
}
141+
});
142+
partitionMap = filteredPartitionMap;
110143
} else {
111-
queryPartitionIds.addAll(metastore.getPartitionNames(schemaTableName.getSchemaName(),
112-
schemaTableName.getTableName()).orElse(Lists.newArrayList()));
144+
partitionMap = getPartitionsByNames(
145+
schemaTableName.getSchemaName(), schemaTableName.getTableName(),
146+
partitionIds);
113147
}
114-
Map<String,Partition> partitionMap = getPartitionsByNames(
115-
schemaTableName.getSchemaName(), schemaTableName.getTableName(),
116-
queryPartitionIds);
117148
Map<ColumnHandle, Comparable<?>> domainMap = ImmutableMap.of(new ColumnHandle(){}, "ignore");
118149
TupleDomain<ColumnHandle> tupleDomain = TupleDomain.withFixedValues(domainMap);
150+
final List<ConnectorPartition> finalResult = result;
119151
partitionMap.forEach((s, partition) -> {
120152
StorageDescriptor sd = partition.getSd();
121153
StorageInfo storageInfo = null;
122-
if( sd != null){
154+
if (sd != null) {
123155
storageInfo = new StorageInfo();
124156
storageInfo.setUri(sd.getLocation());
125157
storageInfo.setInputFormat(sd.getInputFormat());
@@ -132,9 +164,10 @@ private List<ConnectorPartition> getPartitions(SchemaTableName schemaTableName,
132164
}
133165
}
134166
AuditInfo auditInfo = new AuditInfo();
135-
auditInfo.setCreatedDate((long)partition.getCreateTime());
167+
auditInfo.setCreatedDate((long) partition.getCreateTime());
136168
auditInfo.setLastUpdatedDate((long) partition.getLastAccessTime());
137-
result.add( new ConnectorPartitionDetailImpl(s, tupleDomain, storageInfo, partition.getParameters(), auditInfo));
169+
finalResult.add(new ConnectorPartitionDetailImpl(s, tupleDomain, storageInfo, partition.getParameters(),
170+
auditInfo));
138171
});
139172
return result;
140173
}
@@ -327,4 +360,34 @@ public Integer getPartitionCount(ConnectorTableHandle connectorHandle) {
327360
SchemaTableName schemaTableName = HiveUtil.schemaTableName(connectorHandle);
328361
return metastore.getPartitionNames(schemaTableName.getSchemaName(), schemaTableName.getTableName()).orElse(Lists.newArrayList()).size();
329362
}
363+
364+
public List<String> getPartitionKeys(ConnectorTableHandle tableHandle, String filterExpression, List<String> partitionNames, Sort sort, Pageable pageable){
365+
List<String> result = null;
366+
SchemaTableName schemaTableName = HiveUtil.schemaTableName(tableHandle);
367+
if( filterExpression != null || (partitionNames != null && !partitionNames.isEmpty())){
368+
result = getPartitions(schemaTableName, filterExpression, partitionNames).stream().map(
369+
ConnectorPartition::getPartitionId).collect(Collectors.toList());
370+
} else {
371+
result = metastore.getPartitionNames(schemaTableName.getSchemaName(), schemaTableName.getTableName())
372+
.orElse(Lists.newArrayList());
373+
}
374+
if( pageable != null && pageable.isPageable()){
375+
int limit = pageable.getOffset() + pageable.getLimit();
376+
if( result.size() < limit){
377+
limit = result.size();
378+
}
379+
if( pageable.getOffset() > limit) {
380+
result = Lists.newArrayList();
381+
} else {
382+
result = result.subList(pageable.getOffset(), limit);
383+
}
384+
}
385+
return result;
386+
}
387+
388+
public List<String> getPartitionUris(ConnectorTableHandle table, String filterExpression, List<String> partitionNames, Sort sort, Pageable pageable){
389+
SchemaTableName schemaTableName = HiveUtil.schemaTableName(table);
390+
return getPartitions(schemaTableName, filterExpression, partitionNames, sort, pageable, true).stream().map(
391+
partition -> ((ConnectorPartitionDetail) partition).getStorageInfo().getUri()).collect(Collectors.toList());
392+
}
330393
}

metacat-hive-connector/src/main/java/com/netflix/metacat/hive/connector/MetacatHiveMetastore.java

+5-4
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
package com.netflix.metacat.hive.connector;
1515

1616
import com.facebook.presto.hive.metastore.HiveMetastore;
17+
import com.facebook.presto.spi.NotFoundException;
1718
import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
1819
import org.apache.hadoop.hive.metastore.api.Database;
1920
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
@@ -68,19 +69,19 @@ public interface MetacatHiveMetastore extends HiveMetastore {
6869
* @param tableName table name
6970
* @param filter filter expression (JSP comparable expression)
7071
* @return list of partitions
71-
* @throws NoSuchObjectException if the table does not exist
72+
* @throws NotFoundException if the table does not exist
7273
*/
73-
List<Partition> getPartitions(String dbName, String tableName, String filter) throws NoSuchObjectException;
74+
List<Partition> getPartitions(String dbName, String tableName, String filter) throws NotFoundException;
7475

7576
/**
7677
* Get partitions for the list of partition names under the given database and table name.
7778
* @param dbName database name
7879
* @param tableName table name
7980
* @param partitionIds partition ids/names
8081
* @return list of partitions
81-
* @throws NoSuchObjectException if the table does not exist
82+
* @throws NotFoundException if the table does not exist
8283
*/
83-
List<Partition> getPartitions(String dbName, String tableName, List<String> partitionIds) throws NoSuchObjectException;
84+
List<Partition> getPartitions(String dbName, String tableName, List<String> partitionIds) throws NotFoundException;
8485

8586
/**
8687
* Saves partitions.

0 commit comments

Comments
 (0)