Skip to content

Commit 1f9f2b6

Browse files
Hive: test dumper with an in-memory Hive metastore (#49)
* Java 8 compatibility change * Prepare Shadowed Hive 3.1.2 runtime * Hive, use ExecutorManager * Add Gradle properties
1 parent 76e22ec commit 1f9f2b6

File tree

12 files changed

+640
-58
lines changed

12 files changed

+640
-58
lines changed

buildSrc/src/main/groovy/dwh-migration-dumper.java-common-conventions.gradle

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,8 @@ spotless {
180180
// Ignore generated sources.
181181
if (path.contains('/build/'))
182182
return false;
183+
if (path.contains('hive/support/HiveServerSupport.java'))
184+
return false; // Additional copyright credits
183185
return true;
184186
}
185187
target files;

dumper/app/build.gradle

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,14 @@ dependencies {
6666
testImplementation "com.github.stefanbirkner:system-rules"
6767
testImplementation "joda-time:joda-time"
6868

69+
// Test Hive 3.1.2
70+
testImplementation project(path: ':dumper:lib-ext-hive-metastore', configuration: 'shadow')
71+
// Excluded during shadowJar step, same versions as exclusions
72+
testRuntimeOnly "org.datanucleus:datanucleus-api-jdo:4.2.4"
73+
testRuntimeOnly "org.datanucleus:datanucleus-core:4.1.17"
74+
testRuntimeOnly "org.datanucleus:javax.jdo:3.2.0-m3"
75+
testRuntimeOnly "org.datanucleus:datanucleus-rdbms:4.1.19"
76+
6977
sources "org.slf4j:jcl-over-slf4j:1.7.14@sources"
7078
sources "ch.qos.logback:logback-classic:1.2.3@sources"
7179
}

dumper/app/src/main/java/com/google/edwmigration/dumper/application/dumper/connector/hive/AbstractHiveConnector.java

Lines changed: 30 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,16 @@
1919
import com.google.common.base.Preconditions;
2020
import com.google.common.io.ByteSink;
2121
import com.google.common.util.concurrent.MoreExecutors;
22+
import com.google.edwmigration.dumper.application.dumper.ConnectorArguments;
23+
import com.google.edwmigration.dumper.application.dumper.annotations.RespectsInput;
24+
import com.google.edwmigration.dumper.application.dumper.connector.AbstractConnector;
25+
import com.google.edwmigration.dumper.application.dumper.handle.AbstractHandle;
26+
import com.google.edwmigration.dumper.application.dumper.handle.Handle;
27+
import com.google.edwmigration.dumper.application.dumper.task.AbstractTask;
28+
import com.google.edwmigration.dumper.application.dumper.task.TaskRunContext;
29+
import com.google.edwmigration.dumper.ext.hive.metastore.HiveMetastoreThriftClient;
30+
import com.google.edwmigration.dumper.plugin.ext.jdk.concurrent.ExecutorManager;
2231
import com.google.errorprone.annotations.ForOverride;
23-
import java.io.IOException;
2432
import java.io.Writer;
2533
import java.nio.charset.StandardCharsets;
2634
import java.util.ArrayList;
@@ -33,15 +41,6 @@
3341
import javax.annotation.concurrent.ThreadSafe;
3442
import org.apache.thrift.transport.TTransportException;
3543
import org.checkerframework.checker.nullness.qual.NonNull;
36-
import com.google.edwmigration.dumper.application.dumper.ConnectorArguments;
37-
import com.google.edwmigration.dumper.application.dumper.annotations.RespectsInput;
38-
import com.google.edwmigration.dumper.application.dumper.connector.AbstractConnector;
39-
import com.google.edwmigration.dumper.application.dumper.handle.AbstractHandle;
40-
import com.google.edwmigration.dumper.application.dumper.handle.Handle;
41-
import com.google.edwmigration.dumper.application.dumper.task.AbstractTask;
42-
import com.google.edwmigration.dumper.application.dumper.task.TaskRunContext;
43-
import com.google.edwmigration.dumper.ext.hive.metastore.HiveMetastoreThriftClient;
44-
import com.google.edwmigration.dumper.plugin.ext.jdk.concurrent.ExecutorManager;
4544
import org.slf4j.Logger;
4645
import org.slf4j.LoggerFactory;
4746

@@ -64,21 +63,28 @@ public abstract class AbstractHiveConnector extends AbstractConnector {
6463
@ThreadSafe
6564
public static class ThriftClientPool implements AutoCloseable {
6665

66+
public interface ThriftClientConsumer {
67+
void accept(HiveMetastoreThriftClient thriftClient) throws Exception;
68+
}
69+
6770
@Nonnull
6871
private final String name;
6972
@Nonnull
7073
private final ThreadLocal<? extends HiveMetastoreThriftClient> threadLocalThriftClient;
7174
@Nonnull
75+
private final ExecutorManager executorManager;
76+
@Nonnull
7277
private final ExecutorService executorService;
7378
@Nonnull
7479
private final Object lock = new Object();
7580
@GuardedBy("lock")
7681
@Nonnull
7782
private final List<@NonNull HiveMetastoreThriftClient> builtClients = new ArrayList<>();
7883

79-
public ThriftClientPool(@Nonnull String name, @Nonnull HiveMetastoreThriftClient.Builder thriftClientBuilder, @Nonnull ExecutorService executorService) {
84+
public ThriftClientPool(@Nonnull String name, @Nonnull HiveMetastoreThriftClient.Builder thriftClientBuilder, int threadPoolSize) {
8085
this.name = Preconditions.checkNotNull(name, "name was null.");
81-
this.executorService = Preconditions.checkNotNull(executorService, "executorService was null.");
86+
this.executorService = ExecutorManager.newExecutorServiceWithBackpressure(name, threadPoolSize);
87+
this.executorManager = new ExecutorManager(executorService);
8288
this.threadLocalThriftClient = ThreadLocal.withInitial(() -> {
8389
String threadName = Thread.currentThread().getName();
8490
LOG.debug("Creating new thread-local Thrift client '{}' owned by pooled client '{}'.", threadName, name);
@@ -94,22 +100,23 @@ public ThriftClientPool(@Nonnull String name, @Nonnull HiveMetastoreThriftClient
94100
});
95101
}
96102

97-
@Nonnull
98-
public ExecutorService getExecutorService() {
99-
return executorService;
100-
}
103+
public void execute(ThriftClientConsumer consumer) {
104+
executorManager.execute(() -> {
105+
consumer.accept(getThreadLocalThriftClient().get());
106+
return null;
107+
});
108+
}
101109

102110
@Nonnull
103-
public ThreadLocal<@NonNull ? extends HiveMetastoreThriftClient> getThreadLocalThriftClient() {
111+
private ThreadLocal<@NonNull ? extends HiveMetastoreThriftClient> getThreadLocalThriftClient() {
104112
return threadLocalThriftClient;
105113
}
106114

107115
@Override
108-
public void close() throws IOException {
109-
LOG.debug("Closing pooled Thrift client '{}'.", name);
110-
final int TIMEOUT = 30;
111-
LOG.debug("Shutting down thread pool backing pooled Thrift client '{}'; will wait up to {} seconds", name, TIMEOUT);
112-
MoreExecutors.shutdownAndAwaitTermination(executorService, TIMEOUT, TimeUnit.SECONDS);
116+
public void close() throws Exception {
117+
LOG.debug("Shutting down thread pool backing pooled Thrift client '{}'", name);
118+
executorManager.close();
119+
MoreExecutors.shutdownAndAwaitTermination(executorService, 30, TimeUnit.SECONDS);
113120
synchronized (lock) {
114121
for (HiveMetastoreThriftClient client : builtClients) {
115122
try {
@@ -147,7 +154,7 @@ public HiveMetastoreThriftClient newClient(@Nonnull String name) throws TTranspo
147154
@Nonnull
148155
public ThriftClientPool newMultiThreadedThriftClientPool(@Nonnull String name) {
149156
LOG.debug("Creating a new multi-threaded pooled Thrift client named '{}' backed by a thread pool of size {}.", name, threadPoolSize);
150-
return new ThriftClientPool(name, thriftClientBuilder, ExecutorManager.newExecutorServiceWithBackpressure(name, threadPoolSize));
157+
return new ThriftClientPool(name, thriftClientBuilder, threadPoolSize);
151158
}
152159
}
153160

dumper/app/src/main/java/com/google/edwmigration/dumper/application/dumper/connector/hive/HiveMetadataConnector.java

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -105,25 +105,26 @@ private TablesJsonTask(@Nonnull Predicate<String> schemaPredicate, boolean isHiv
105105
@Override
106106
protected void run(@Nonnull Writer writer, @Nonnull ThriftClientHandle thriftClientHandle) throws Exception {
107107
try (ThriftClientPool clientPool = thriftClientHandle.newMultiThreadedThriftClientPool("tables-task-pooled-client")) {
108-
List<? extends String> allDatabases = clientPool.getThreadLocalThriftClient().get().getAllDatabaseNames();
109-
for (String databaseName : allDatabases) {
110-
if (isIncludedSchema(databaseName)) {
111-
List<? extends String> allTables = clientPool.getThreadLocalThriftClient().get().getAllTableNamesInDatabase(databaseName);
112-
try (ConcurrentProgressMonitor monitor = new ConcurrentRecordProgressMonitor("Writing tables in schema '" + databaseName + "' to " + getTargetPath(), allTables.size())) {
113-
for (String tableName : allTables) {
114-
dumpTable(monitor, writer, clientPool, databaseName, tableName);
108+
clientPool.execute(thriftClient -> {
109+
List<? extends String> allDatabases = thriftClient.getAllDatabaseNames();
110+
for (String databaseName : allDatabases) {
111+
if (isIncludedSchema(databaseName)) {
112+
List<? extends String> allTables = thriftClient.getAllTableNamesInDatabase(databaseName);
113+
try (ConcurrentProgressMonitor monitor = new ConcurrentRecordProgressMonitor("Writing tables in schema '" + databaseName + "' to " + getTargetPath(), allTables.size())) {
114+
for (String tableName : allTables) {
115+
dumpTable(monitor, writer, clientPool, databaseName, tableName);
116+
}
115117
}
116118
}
117119
}
118-
}
120+
});
119121
}
120122
}
121123

122124
private void dumpTable(@Nonnull ConcurrentProgressMonitor monitor, @Nonnull Writer writer, @Nonnull ThriftClientPool clientPool, @Nonnull String databaseName, @Nonnull String tableName) {
123-
clientPool.getExecutorService().execute(() -> {
125+
clientPool.execute((thriftClient) -> {
124126
try {
125127
monitor.count();
126-
HiveMetastoreThriftClient thriftClient = clientPool.getThreadLocalThriftClient().get();
127128
Table table = thriftClient.getTable(databaseName, tableName);
128129
TableMetadata outTable = new TableMetadata();
129130
outTable.schemaName = table.getDatabaseName();

dumper/app/src/main/java/com/google/edwmigration/dumper/application/dumper/connector/redshift/AbstractRedshiftConnector.java

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,6 @@
1818

1919
import com.google.common.base.Joiner;
2020
import com.google.common.collect.Iterables;
21-
import java.sql.Driver;
22-
import java.time.ZoneOffset;
23-
import java.time.format.DateTimeFormatter;
24-
import java.util.List;
25-
import javax.annotation.Nonnull;
26-
import javax.sql.DataSource;
2721
import com.google.edwmigration.dumper.application.dumper.ConnectorArguments;
2822
import com.google.edwmigration.dumper.application.dumper.MetadataDumperUsageException;
2923
import com.google.edwmigration.dumper.application.dumper.annotations.RespectsArgumentDatabaseForConnection;
@@ -35,6 +29,13 @@
3529
import com.google.edwmigration.dumper.application.dumper.connector.AbstractJdbcConnector;
3630
import com.google.edwmigration.dumper.application.dumper.handle.Handle;
3731
import com.google.edwmigration.dumper.application.dumper.handle.JdbcHandle;
32+
import java.io.UnsupportedEncodingException;
33+
import java.sql.Driver;
34+
import java.time.ZoneOffset;
35+
import java.time.format.DateTimeFormatter;
36+
import java.util.List;
37+
import javax.annotation.Nonnull;
38+
import javax.sql.DataSource;
3839
import org.slf4j.Logger;
3940
import org.slf4j.LoggerFactory;
4041
import org.springframework.jdbc.datasource.SimpleDriverDataSource;
@@ -123,7 +124,7 @@ private static String requireNonNull(String val, String msg) throws MetadataDump
123124
}
124125

125126
@Nonnull
126-
private String makeJdbcUrlPostgresql(ConnectorArguments arguments) throws MetadataDumperUsageException {
127+
private String makeJdbcUrlPostgresql(ConnectorArguments arguments) throws MetadataDumperUsageException, UnsupportedEncodingException {
127128
return "jdbc:postgresql://"
128129
+ requireNonNull(arguments.getHost(), "--host should be specified")
129130
+ ":"
@@ -138,7 +139,7 @@ private String makeJdbcUrlPostgresql(ConnectorArguments arguments) throws Metada
138139
}
139140

140141
@Nonnull
141-
private String makeJdbcUrlRedshiftSimple(ConnectorArguments arguments) throws MetadataDumperUsageException {
142+
private String makeJdbcUrlRedshiftSimple(ConnectorArguments arguments) throws MetadataDumperUsageException, UnsupportedEncodingException {
142143
return "jdbc:redshift://"
143144
+ requireNonNull(arguments.getHost(), "--host should be specified")
144145
+ ":"
@@ -154,7 +155,7 @@ private String makeJdbcUrlRedshiftSimple(ConnectorArguments arguments) throws Me
154155
//TODO: [cluster-id]:[region] syntax.
155156
// either profile, or key+ secret
156157
@Nonnull
157-
private String makeJdbcUrlRedshiftIAM(ConnectorArguments arguments) throws MetadataDumperUsageException {
158+
private String makeJdbcUrlRedshiftIAM(ConnectorArguments arguments) throws MetadataDumperUsageException, UnsupportedEncodingException {
158159
String url = "jdbc:redshift:iam://"
159160
+ requireNonNull(arguments.getHost(), "--host should be specified")
160161
+ ":"

dumper/app/src/main/java/com/google/edwmigration/dumper/application/dumper/connector/redshift/JdbcPropBuilder.java

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,14 @@
1616
*/
1717
package com.google.edwmigration.dumper.application.dumper.connector.redshift;
1818

19-
import static java.nio.charset.StandardCharsets.UTF_8;
20-
2119
import com.google.common.base.Joiner;
20+
import com.google.edwmigration.dumper.application.dumper.MetadataDumperUsageException;
21+
import java.io.UnsupportedEncodingException;
2222
import java.net.URLEncoder;
23-
import java.nio.charset.StandardCharsets;
2423
import java.util.ArrayList;
2524
import java.util.List;
2625
import javax.annotation.CheckForNull;
2726
import javax.annotation.Nonnull;
28-
import com.google.edwmigration.dumper.application.dumper.MetadataDumperUsageException;
2927
import org.slf4j.Logger;
3028
import org.slf4j.LoggerFactory;
3129

@@ -46,7 +44,7 @@ public JdbcPropBuilder(String seps) {
4644
}
4745

4846
@Nonnull
49-
public JdbcPropBuilder propOrWarn(@Nonnull String prop, @CheckForNull String val, @Nonnull String msg) {
47+
public JdbcPropBuilder propOrWarn(@Nonnull String prop, @CheckForNull String val, @Nonnull String msg) throws UnsupportedEncodingException {
5048
if (val == null) {
5149
LOG.warn(msg);
5250
} else {
@@ -56,7 +54,7 @@ public JdbcPropBuilder propOrWarn(@Nonnull String prop, @CheckForNull String val
5654
}
5755

5856
@Nonnull
59-
public JdbcPropBuilder propOrError(@Nonnull String prop, @CheckForNull String val, @Nonnull String msg) throws MetadataDumperUsageException {
57+
public JdbcPropBuilder propOrError(@Nonnull String prop, @CheckForNull String val, @Nonnull String msg) throws MetadataDumperUsageException, UnsupportedEncodingException {
6058
if (val == null) {
6159
LOG.error(msg);
6260
throw new MetadataDumperUsageException(msg);
@@ -67,7 +65,7 @@ public JdbcPropBuilder propOrError(@Nonnull String prop, @CheckForNull String va
6765
}
6866

6967
@Nonnull
70-
public JdbcPropBuilder prop(@Nonnull String prop, @Nonnull String val) {
68+
public JdbcPropBuilder prop(@Nonnull String prop, @Nonnull String val) throws UnsupportedEncodingException {
7169
if (val == null) {
7270
throw new InternalError("Not checked for null: " + val);
7371
} else {
@@ -76,8 +74,9 @@ public JdbcPropBuilder prop(@Nonnull String prop, @Nonnull String val) {
7674
return this;
7775
}
7876

79-
private void addProp(String prop, String val) {
80-
props.add(prop + punctuations.charAt(1) + URLEncoder.encode(val, UTF_8));
77+
private void addProp(String prop, String val) throws UnsupportedEncodingException {
78+
// The encode(String, Charset) overload is JDK 10+
79+
props.add(prop + punctuations.charAt(1) + URLEncoder.encode(val, "UTF-8"));
8180
}
8281

8382
@Nonnull

0 commit comments

Comments
 (0)