Skip to content

Commit 10f7640

Browse files
authored
Use Kerberos for Hive Authentication (#317)
1 parent 6fd7730 commit 10f7640

File tree

3 files changed

+74
-18
lines changed

3 files changed

+74
-18
lines changed

dumper/app/src/main/java/com/google/edwmigration/dumper/application/dumper/ConnectorArguments.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ public class ConnectorArguments extends DefaultArguments {
126126
public static final String OPT_HIVE_METASTORE_DUMP_PARTITION_METADATA =
127127
"hive-metastore-dump-partition-metadata";
128128
public static final String OPT_HIVE_METASTORE_DUMP_PARTITION_METADATA_DEFAULT = "true";
129+
public static final String OPT_HIVE_KERBEROS_URL = "hive-kerberos-url";
129130

130131
public static final String OPT_REQUIRED_IF_NOT_URL = "if --url is not specified";
131132
public static final String OPT_THREAD_POOL_SIZE = "thread-pool-size";
@@ -349,6 +350,18 @@ public class ConnectorArguments extends DefaultArguments {
349350
.withOptionalArg()
350351
.withValuesConvertedBy(BooleanValueConverter.INSTANCE)
351352
.defaultsTo(Boolean.parseBoolean(OPT_HIVE_METASTORE_DUMP_PARTITION_METADATA_DEFAULT));
353+
private final OptionSpec<String> optionHiveKerberosUrl =
354+
parser
355+
.accepts(
356+
OPT_HIVE_KERBEROS_URL,
357+
"Kerberos URL to use to authenticate Hive Thrift API. Please note that we don't"
358+
+ " accept Kerberos `REALM` in the URL. Please ensure that the tool runs in an"
359+
+ " environment where the default `REALM` is known and used. It's recommended to"
360+
+ " generate a Kerberos ticket with the same user before running the dumper. The"
361+
+ " tool will prompt for credentials if a ticket is not provided.")
362+
.withOptionalArg()
363+
.ofType(String.class)
364+
.describedAs("principal/host");
352365

353366
// Threading / Pooling
354367
private final OptionSpec<Integer> optionThreadPoolSize =
@@ -818,6 +831,11 @@ public boolean isHiveMetastorePartitionMetadataDumpingEnabled() {
818831
return BooleanUtils.isTrue(getOptions().valueOf(optionHivePartitionMetadataCollection));
819832
}
820833

834+
@CheckForNull
835+
public String getHiveKerberosUrl() {
836+
return getOptions().valueOf(optionHiveKerberosUrl);
837+
}
838+
821839
public boolean saveResponseFile() {
822840
return getOptions().has(optionSaveResponse);
823841
}

dumper/app/src/main/java/com/google/edwmigration/dumper/application/dumper/connector/hive/AbstractHiveConnector.java

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
import javax.annotation.Nonnull;
4040
import javax.annotation.concurrent.GuardedBy;
4141
import javax.annotation.concurrent.ThreadSafe;
42+
import javax.security.sasl.SaslException;
4243
import org.apache.thrift.transport.TTransportException;
4344
import org.checkerframework.checker.nullness.qual.NonNull;
4445
import org.slf4j.Logger;
@@ -63,7 +64,8 @@
6364
order = 401,
6465
arg = ConnectorArguments.OPT_HIVE_METASTORE_DUMP_PARTITION_METADATA,
6566
description =
66-
"Dump partition metadata; you may wish to disable this for production metastores with a significant number of partitions due to Thrift client performance implications.",
67+
"Dump partition metadata; you may wish to disable this for production metastores with a"
68+
+ " significant number of partitions due to Thrift client performance implications.",
6769
defaultValue = ConnectorArguments.OPT_HIVE_METASTORE_DUMP_PARTITION_METADATA_DEFAULT)
6870
@RespectsInput(
6971
order = 500,
@@ -122,7 +124,7 @@ public ThriftClientPool(
122124
builtClients.add(client);
123125
}
124126
return client;
125-
} catch (TTransportException e) {
127+
} catch (Exception e) {
126128
throw new RuntimeException(
127129
"Unable to build Thrift client '"
128130
+ threadName
@@ -191,7 +193,8 @@ public ThriftClientHandle(
191193

192194
/** Returns a thread-unsafe Thrift client unsuitable for use in multi-threaded contexts. */
193195
@Nonnull
194-
public HiveMetastoreThriftClient newClient(@Nonnull String name) throws TTransportException {
196+
public HiveMetastoreThriftClient newClient(@Nonnull String name)
197+
throws TTransportException, SaslException {
195198
LOG.debug("Creating a new Thrift client named '{}'.", name);
196199
return new HiveMetastoreThriftClient.Builder(thriftClientBuilder).withName(name).build();
197200
}
@@ -200,7 +203,8 @@ public HiveMetastoreThriftClient newClient(@Nonnull String name) throws TTranspo
200203
@Nonnull
201204
public ThriftClientPool newMultiThreadedThriftClientPool(@Nonnull String name) {
202205
LOG.debug(
203-
"Creating a new multi-threaded pooled Thrift client named '{}' backed by a thread pool of size {}.",
206+
"Creating a new multi-threaded pooled Thrift client named '{}' backed by a thread pool of"
207+
+ " size {}.",
204208
name,
205209
threadPoolSize);
206210
return new ThriftClientPool(name, thriftClientBuilder, threadPoolSize);
@@ -255,7 +259,8 @@ public Handle open(ConnectorArguments arguments) throws Exception {
255259
arguments.getPort(
256260
Integer.parseInt(ConnectorArguments.OPT_HIVE_METASTORE_PORT_DEFAULT)))
257261
.withUnavailableClientVersionBehavior(
258-
HiveMetastoreThriftClient.Builder.UnavailableClientVersionBehavior.FALLBACK);
262+
HiveMetastoreThriftClient.Builder.UnavailableClientVersionBehavior.FALLBACK)
263+
.withKerberosUrl(arguments.getHiveKerberosUrl());
259264
return new ThriftClientHandle(thriftClientBuilder, arguments.getThreadPoolSize());
260265
}
261266
}

dumper/lib-ext-hive-metastore/src/main/java/com/google/edwmigration/dumper/ext/hive/metastore/HiveMetastoreThriftClient.java

Lines changed: 46 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,19 @@
1919
import com.google.common.base.Joiner;
2020
import com.google.common.base.Preconditions;
2121
import com.google.common.collect.ImmutableMap;
22+
import java.util.HashMap;
2223
import java.util.List;
2324
import java.util.Map;
2425
import javax.annotation.Nonnegative;
2526
import javax.annotation.Nonnull;
27+
import javax.annotation.Nullable;
2628
import javax.annotation.concurrent.NotThreadSafe;
29+
import javax.security.sasl.Sasl;
30+
import javax.security.sasl.SaslException;
2731
import org.apache.thrift.TConfiguration;
2832
import org.apache.thrift.protocol.TBinaryProtocol;
2933
import org.apache.thrift.protocol.TProtocol;
34+
import org.apache.thrift.transport.TSaslClientTransport;
3035
import org.apache.thrift.transport.TSocket;
3136
import org.apache.thrift.transport.TTransport;
3237
import org.apache.thrift.transport.TTransportException;
@@ -70,6 +75,7 @@ public static enum UnavailableClientVersionBehavior {
7075
@Nonnull private String name = "unnamed-thrift-client";
7176
@Nonnull private String host = "localhost";
7277
@Nonnegative private int port;
78+
@Nullable private String kerberosUrl;
7379

7480
@Nonnull
7581
private UnavailableClientVersionBehavior unavailableClientBehavior =
@@ -89,6 +95,7 @@ public Builder(@Nonnull Builder builder) {
8995
this.port = builder.port;
9096
this.unavailableClientBehavior = builder.unavailableClientBehavior;
9197
this.debug = builder.debug;
98+
this.kerberosUrl = builder.kerberosUrl;
9299
}
93100

94101
@Nonnull
@@ -109,6 +116,12 @@ public Builder withPort(@Nonnegative int port) {
109116
return this;
110117
}
111118

119+
@Nonnull
120+
public Builder withKerberosUrl(@Nullable String kerberosUrl) {
121+
this.kerberosUrl = kerberosUrl;
122+
return this;
123+
}
124+
112125
@Nonnull
113126
public Builder withUnavailableClientVersionBehavior(
114127
@Nonnull UnavailableClientVersionBehavior behavior) {
@@ -123,15 +136,7 @@ public Builder withDebug(boolean value) {
123136
}
124137

125138
@Nonnull
126-
public HiveMetastoreThriftClient build() throws TTransportException {
127-
128-
// We used to support Kerberos authentication, but that was when we used the Hive metastore
129-
// client
130-
// wrapper around Thrift. Now that we are connecting via Thrift directly, we would need to
131-
// wrap
132-
// the TTransport here with a TSaslClientTransport parameterized accordingly. This hasn't been
133-
// done yet
134-
// in the interest of expediency.
139+
private TTransport createTransport() throws SaslException, TTransportException {
135140
TTransport transport =
136141
new TSocket(
137142
new TConfiguration(
@@ -140,14 +145,41 @@ public HiveMetastoreThriftClient build() throws TTransportException {
140145
TConfiguration.DEFAULT_RECURSION_DEPTH),
141146
host,
142147
port);
148+
149+
if (kerberosUrl == null) {
150+
return transport;
151+
}
152+
153+
String[] urlParts = kerberosUrl.split("/");
154+
155+
if (urlParts.length != 2) {
156+
throw new IllegalArgumentException(
157+
"Please provide an URL in the format of `principal/cluster`");
158+
}
159+
160+
Map<String, String> saslProperties = new HashMap<>();
161+
saslProperties.put(Sasl.SERVER_AUTH, "true");
162+
saslProperties.put(Sasl.QOP, "auth-conf");
163+
164+
// See:
165+
// https://docs.oracle.com/javase/8/docs/technotes/guides/security/jgss/single-signon.html
166+
return new TSaslClientTransport(
167+
"GSSAPI", null, urlParts[0], urlParts[1], saslProperties, null, transport);
168+
}
169+
170+
@Nonnull
171+
public HiveMetastoreThriftClient build() throws TTransportException, SaslException {
172+
TTransport transport = createTransport();
173+
143174
TProtocol protocol = new TBinaryProtocol(transport);
144175
transport.open();
145176

146177
final HiveMetastoreThriftClient client;
147178
if (supportedVersionMappings.containsKey(requestedVersionString)) {
148179
if (debug)
149180
LOG.debug(
150-
"The request for Hive metastore Thrift client version '{}' is satisfiable; building it now.",
181+
"The request for Hive metastore Thrift client version '{}' is satisfiable; building"
182+
+ " it now.",
151183
requestedVersionString);
152184
client = supportedVersionMappings.get(requestedVersionString).provide(name, protocol);
153185
} else {
@@ -161,9 +193,10 @@ public HiveMetastoreThriftClient build() throws TTransportException {
161193
if (unavailableClientBehavior == UnavailableClientVersionBehavior.FALLBACK) {
162194
LOG.warn(
163195
messagePrefix
164-
+ " The caller requested fallback behavior, so a client compiled against a superset Thrift specification will be used instead. "
165-
+ "If you encounter an error when using the fallback client, please contact CompilerWorks support and provide "
166-
+ "the originally requested version number.");
196+
+ " The caller requested fallback behavior, so a client compiled against a"
197+
+ " superset Thrift specification will be used instead. If you encounter an error"
198+
+ " when using the fallback client, please contact CompilerWorks support and"
199+
+ " provide the originally requested version number.");
167200
client = new HiveMetastoreThriftClient_Superset(name, protocol);
168201
} else {
169202
throw new UnsupportedOperationException(

0 commit comments

Comments
 (0)