30
30
import java .nio .charset .StandardCharsets ;
31
31
import java .sql .Connection ;
32
32
import java .sql .DriverManager ;
33
- import java .sql .SQLException ;
34
33
import java .util .concurrent .ExecutorService ;
35
34
import java .util .concurrent .Executors ;
36
- import java .util .concurrent .TimeUnit ;
35
+ import java .util .concurrent .Future ;
37
36
import org .apache .hadoop .conf .Configuration ;
38
37
import org .apache .hadoop .fs .FileStatus ;
39
38
import org .apache .hadoop .fs .FileSystem ;
40
39
import org .apache .hadoop .fs .Path ;
41
40
import org .apache .hadoop .hive .conf .HiveConf ;
42
41
import org .apache .hadoop .hive .metastore .HMSHandler ;
42
+ import org .apache .hadoop .hive .metastore .HMSHandlerProxyFactory ;
43
43
import org .apache .hadoop .hive .metastore .IHMSHandler ;
44
- import org .apache .hadoop .hive .metastore .RetryingHMSHandler ;
44
+ import org .apache .hadoop .hive .metastore .IMetaStoreClient ;
45
45
import org .apache .hadoop .hive .metastore .TSetIpAddressProcessor ;
46
+ import org .apache .hadoop .hive .metastore .api .GetTableRequest ;
47
+ import org .apache .hadoop .hive .metastore .api .Table ;
48
+ import org .apache .iceberg .ClientPool ;
49
+ import org .apache .iceberg .catalog .TableIdentifier ;
46
50
import org .apache .iceberg .common .DynConstructors ;
47
51
import org .apache .iceberg .common .DynMethods ;
48
52
import org .apache .iceberg .hadoop .Util ;
49
53
import org .apache .iceberg .hive .HiveClientPool ;
54
+ import org .apache .thrift .TException ;
50
55
import org .apache .thrift .protocol .TBinaryProtocol ;
51
56
import org .apache .thrift .server .TServer ;
52
57
import org .apache .thrift .server .TThreadPoolServer ;
58
63
* HiveMetastoreExtension} instead.
59
64
*
60
65
* <p>Copied over from <a
61
- * href="https://github.com/apache/iceberg /blob/main/hive-metastore /src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java">Iceberg's
66
+ * href="https://github.com/apache/hive /blob/branch-4.0/iceberg/iceberg-catalog /src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java">Iceberg's
62
67
* integration testing util</a>
63
68
*/
64
69
public class TestHiveMetastore {
@@ -76,10 +81,22 @@ public class TestHiveMetastore {
76
81
77
82
private static final DynMethods .StaticMethod GET_BASE_HMS_HANDLER =
78
83
DynMethods .builder ("getProxy" )
79
- .impl (RetryingHMSHandler .class , Configuration .class , IHMSHandler .class , boolean .class )
80
- .impl (RetryingHMSHandler .class , HiveConf .class , IHMSHandler .class , boolean .class )
84
+ .impl (HMSHandlerProxyFactory .class , Configuration .class , IHMSHandler .class , boolean .class )
85
+ .impl (HMSHandlerProxyFactory .class , HiveConf .class , IHMSHandler .class , boolean .class )
81
86
.buildStatic ();
82
87
88
+ // Hive3 introduces background metastore tasks (MetastoreTaskThread) for performing various
89
+ // cleanup duties. These
90
+ // threads are scheduled and executed in a static thread pool
91
+ // (org.apache.hadoop.hive.metastore.ThreadPool).
92
+ // This thread pool is shut down normally as part of the JVM shutdown hook, but since we're
93
+ // creating and tearing down
94
+ // multiple metastore instances within the same JVM, we have to call this cleanup method manually,
95
+ // otherwise
96
+ // threads from our previous test suite will be stuck in the pool with stale config, and keep on
97
+ // being scheduled.
98
+ // This can lead to issues, e.g. accidental Persistence Manager closure by
99
+ // ScheduledQueryExecutionsMaintTask.
83
100
private static final DynMethods .StaticMethod METASTORE_THREADS_SHUTDOWN =
84
101
DynMethods .builder ("shutdown" )
85
102
.impl ("org.apache.hadoop.hive.metastore.ThreadPool" )
@@ -89,13 +106,15 @@ public class TestHiveMetastore {
89
106
// It's tricky to clear all static fields in an HMS instance in order to switch derby root dir.
90
107
// Therefore, we reuse the same derby root between tests and remove it after JVM exits.
91
108
private static final File HIVE_LOCAL_DIR ;
109
+ private static final File HIVE_EXTERNAL_WAREHOUSE_DIR ;
92
110
private static final String DERBY_PATH ;
93
111
94
112
static {
95
113
try {
96
114
HIVE_LOCAL_DIR =
97
115
createTempDirectory ("hive" , asFileAttribute (fromString ("rwxrwxrwx" ))).toFile ();
98
- DERBY_PATH = HIVE_LOCAL_DIR + "/metastore_db" ;
116
+ DERBY_PATH = new File (HIVE_LOCAL_DIR , "metastore_db" ).getPath ();
117
+ HIVE_EXTERNAL_WAREHOUSE_DIR = new File (HIVE_LOCAL_DIR , "external" );
99
118
File derbyLogFile = new File (HIVE_LOCAL_DIR , "derby.log" );
100
119
System .setProperty ("derby.stream.error.file" , derbyLogFile .getAbsolutePath ());
101
120
setupMetastoreDB ("jdbc:derby:" + DERBY_PATH + ";create=true" );
@@ -127,9 +146,16 @@ public class TestHiveMetastore {
127
146
TestHiveMetastore (String hiveWarehousePath ) {
128
147
this .hiveWarehousePath = hiveWarehousePath ;
129
148
}
149
+ /**
150
+ * Starts a TestHiveMetastore with the default connection pool size (5) and the default HiveConf.
151
+ */
152
+ public void start () {
153
+ start (new HiveConf (new Configuration (), TestHiveMetastore .class ), DEFAULT_POOL_SIZE );
154
+ }
130
155
131
156
/**
132
- * Starts a TestHiveMetastore with the default connection pool size with the provided HiveConf.
157
+ * Starts a TestHiveMetastore with the default connection pool size (5) with the provided
158
+ * HiveConf.
133
159
*
134
160
* @param conf The hive configuration to use
135
161
*/
@@ -143,7 +169,6 @@ public void start(HiveConf conf) {
143
169
* @param conf The hive configuration to use
144
170
* @param poolSize The number of threads in the executor pool
145
171
*/
146
- @ SuppressWarnings ("FutureReturnValueIgnored" )
147
172
public void start (HiveConf conf , int poolSize ) {
148
173
try {
149
174
TServerSocket socket = new TServerSocket (0 );
@@ -153,7 +178,14 @@ public void start(HiveConf conf, int poolSize) {
153
178
this .hiveConf = conf ;
154
179
this .server = newThriftServer (socket , poolSize , hiveConf );
155
180
this .executorService = Executors .newSingleThreadExecutor ();
156
- this .executorService .submit (() -> server .serve ());
181
+ Future <?> ignored = this .executorService .submit (() -> server .serve ());
182
+
183
+ // in Hive3, setting this as a system prop ensures that it will be picked up whenever a new
184
+ // HiveConf is created
185
+ System .setProperty (
186
+ HiveConf .ConfVars .METASTORE_URIS .varname ,
187
+ hiveConf .getVar (HiveConf .ConfVars .METASTORE_URIS ));
188
+
157
189
this .clientPool = new HiveClientPool (1 , hiveConf );
158
190
} catch (Exception e ) {
159
191
throw new RuntimeException ("Cannot start TestHiveMetastore" , e );
@@ -169,13 +201,7 @@ public void stop() throws Exception {
169
201
server .stop ();
170
202
}
171
203
if (executorService != null ) {
172
- executorService .shutdownNow ();
173
- try {
174
- // Give it a reasonable timeout
175
- executorService .awaitTermination (10 , TimeUnit .SECONDS );
176
- } catch (InterruptedException e ) {
177
- Thread .currentThread ().interrupt ();
178
- }
204
+ executorService .shutdown ();
179
205
}
180
206
if (baseHandler != null ) {
181
207
baseHandler .shutdown ();
@@ -215,9 +241,6 @@ public void reset() throws Exception {
215
241
216
242
Path warehouseRoot = new Path (hiveWarehousePath );
217
243
FileSystem fs = Util .getFs (warehouseRoot , hiveConf );
218
- if (!fs .exists (warehouseRoot )) {
219
- return ;
220
- }
221
244
for (FileStatus fileStatus : fs .listStatus (warehouseRoot )) {
222
245
if (!fileStatus .getPath ().getName ().equals ("derby.log" )
223
246
&& !fileStatus .getPath ().getName ().equals ("metastore_db" )) {
@@ -226,6 +249,19 @@ public void reset() throws Exception {
226
249
}
227
250
}
228
251
252
+ public Table getTable (String dbName , String tableName ) throws TException , InterruptedException {
253
+ return clientPool .run (client -> client .getTable (new GetTableRequest (dbName , tableName )));
254
+ }
255
+
256
+ public Table getTable (TableIdentifier identifier ) throws TException , InterruptedException {
257
+ return getTable (identifier .namespace ().toString (), identifier .name ());
258
+ }
259
+
260
+ public <R > R run (ClientPool .Action <R , IMetaStoreClient , TException > action )
261
+ throws InterruptedException , TException {
262
+ return clientPool .run (action , false );
263
+ }
264
+
229
265
private TServer newThriftServer (TServerSocket socket , int poolSize , HiveConf conf )
230
266
throws Exception {
231
267
HiveConf serverConf = new HiveConf (conf );
@@ -249,20 +285,24 @@ private TServer newThriftServer(TServerSocket socket, int poolSize, HiveConf con
249
285
private void initConf (HiveConf conf , int port ) {
250
286
conf .set (HiveConf .ConfVars .METASTORE_URIS .varname , "thrift://localhost:" + port );
251
287
conf .set (HiveConf .ConfVars .METASTORE_WAREHOUSE .varname , hiveWarehousePath );
288
+ conf .set (
289
+ HiveConf .ConfVars .HIVE_METASTORE_WAREHOUSE_EXTERNAL .varname ,
290
+ "file:" + HIVE_EXTERNAL_WAREHOUSE_DIR .getAbsolutePath ());
252
291
conf .set (HiveConf .ConfVars .METASTORE_TRY_DIRECT_SQL .varname , "false" );
253
292
conf .set (HiveConf .ConfVars .METASTORE_DISALLOW_INCOMPATIBLE_COL_TYPE_CHANGES .varname , "false" );
254
293
conf .set ("iceberg.hive.client-pool-size" , "2" );
255
- // Setting this to avoid thrift exception during running Iceberg tests outside Iceberg.
256
- conf .set (
257
- HiveConf .ConfVars .HIVE_IN_TEST .varname , HiveConf .ConfVars .HIVE_IN_TEST .getDefaultValue ());
294
+ // set to false so that TxnManager#checkLock does not throw exception when using UNSET data type
295
+ // operation
296
+ // in the requested lock component
297
+ conf .setBoolVar (HiveConf .ConfVars .HIVE_IN_TEST , false );
258
298
}
259
299
260
- private static void setupMetastoreDB (String dbURL ) throws SQLException , IOException {
300
+ private static void setupMetastoreDB (String dbURL ) throws Exception {
261
301
Connection connection = DriverManager .getConnection (dbURL );
262
302
ScriptRunner scriptRunner = new ScriptRunner (connection , true , true );
263
303
264
304
ClassLoader classLoader = ClassLoader .getSystemClassLoader ();
265
- InputStream inputStream = classLoader .getResourceAsStream ("hive-schema-3.1 .0.derby.sql" );
305
+ InputStream inputStream = classLoader .getResourceAsStream ("hive-schema-4.0 .0.derby.sql" );
266
306
try (Reader reader = new InputStreamReader (inputStream , StandardCharsets .UTF_8 )) {
267
307
scriptRunner .runScript (reader );
268
308
}
0 commit comments