Skip to content

Commit 319d298

Browse files
committed
Fix tests to pass against Presto 0.76 and fix test setup issues
- Modify test script to make more databases - Make tests pass against Presto 0.76 - Default to current user instead of hard-coded "hadoop" user - Squash together Hive setup commands and use local JT to make tests faster - Convert binary type from base64 to byte strings
1 parent 1814f9d commit 319d298

14 files changed

+68
-41
lines changed

README.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,10 @@ Testing
4848
Run the following in an environment with Hive/Presto::
4949

5050
./scripts/make_test_tables.sh
51-
virtualenv env
51+
virtualenv --no-site-packages env
5252
source env/bin/activate
5353
pip install -r dev_requirements.txt
5454
py.test
5555

56-
WARNING: This drops/creates tables named ``one_row``, ``one_row_complex``, and ``many_rows``.
56+
WARNING: This drops/creates tables named ``one_row``, ``one_row_complex``, and ``many_rows``, plus a
57+
database called ``pyhive_test_database``.

dev_requirements.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
mock>=1.0.0
2-
pytest
3-
pytest-cov
2+
# SQLAlchemy's test suite breaks with pytest 2.6
3+
pytest==2.5.2
4+
pytest-cov==1.7.0
45
requests>=1.0.0
56
sasl>=0.1.3
67
sqlalchemy>=0.9.4

pyhive/presto.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from pyhive.common import DBAPITypeObject
1212
# Make all exceptions visible in this module per DB-API
1313
from pyhive.exc import *
14+
import base64
1415
import getpass
1516
import logging
1617
import requests
@@ -181,6 +182,14 @@ def _fetch_more(self):
181182
"""Fetch the next URI and update state"""
182183
self._process_response(requests.get(self._nextUri))
183184

185+
def _decode_binary(self, rows):
186+
# As of Presto 0.69, binary data is returned as the varbinary type in base64 format
187+
# This function decodes base64 data in place
188+
for i, col in enumerate(self.description):
189+
if col[1] == 'varbinary':
190+
for row in rows:
191+
row[i] = base64.b64decode(row[i])
192+
184193
def _process_response(self, response):
185194
"""Given the JSON response from Presto's REST API, update the internal state with the next
186195
URI and any data from the response
@@ -194,7 +203,11 @@ def _process_response(self, response):
194203
assert self._state == self._STATE_RUNNING, "Should be running if processing response"
195204
self._nextUri = response_json.get('nextUri')
196205
self._columns = response_json.get('columns')
197-
self._data += response_json.get('data', [])
206+
if 'data' in response_json:
207+
assert self._columns
208+
new_data = response_json['data']
209+
self._decode_binary(new_data)
210+
self._data += new_data
198211
if 'nextUri' not in response_json:
199212
self._state = self._STATE_FINISHED
200213
if 'error' in response_json:

pyhive/sqlalchemy_presto.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -179,16 +179,15 @@ def get_columns(self, connection, table_name, schema=None, **kw):
179179
rows = self._get_table_columns(connection, table_name, None)
180180
result = []
181181
for row in rows:
182-
name, coltype, nullable, _is_partition_key = row
183182
try:
184-
coltype = _type_map[coltype]
183+
coltype = _type_map[row.Type]
185184
except KeyError:
186-
util.warn("Did not recognize type '%s' of column '%s'" % (coltype, name))
185+
util.warn("Did not recognize type '%s' of column '%s'" % (row.Type, row.Column))
187186
coltype = types.NullType
188187
result.append({
189-
'name': name,
188+
'name': row.Column,
190189
'type': coltype,
191-
'nullable': nullable,
190+
'nullable': row.Null,
192191
'default': None,
193192
})
194193
return result

pyhive/tests/test_hive.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ class TestHive(unittest.TestCase, DBAPITestCase):
2121
__test__ = True
2222

2323
def connect(self):
24-
return hive.connect(host=_HOST, username='hadoop')
24+
return hive.connect(host=_HOST, configuration={'mapred.job.tracker': 'local'})
2525

2626
@with_cursor
2727
def test_description(self, cursor):

pyhive/tests/test_presto.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@ def test_complex(self, cursor):
4040
('float', 'double', None, None, None, None, True),
4141
('double', 'double', None, None, None, None, True),
4242
('string', 'varchar', None, None, None, None, True),
43-
('timestamp', 'bigint', None, None, None, None, True),
44-
('binary', 'varchar', None, None, None, None, True),
43+
('timestamp', 'timestamp', None, None, None, None, True),
44+
('binary', 'varbinary', None, None, None, None, True),
4545
('array', 'varchar', None, None, None, None, True),
4646
('map', 'varchar', None, None, None, None, True),
4747
('struct', 'varchar', None, None, None, None, True),
@@ -57,7 +57,7 @@ def test_complex(self, cursor):
5757
0.5,
5858
0.25,
5959
'a string',
60-
0,
60+
'1970-01-01 00:00:00.000',
6161
'123',
6262
'[1,2]',
6363
'{"1":2,"3":4}', # Presto converts all keys to strings so that they're valid JSON

pyhive/tests/test_sqlalchemy_hive.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333

3434
class TestSqlAlchemyHive(unittest.TestCase, SqlAlchemyTestCase):
3535
def create_engine(self):
36-
return create_engine('hive://hadoop@localhost:10000/default')
36+
return create_engine('hive://localhost:10000/default')
3737

3838
@with_engine_connection
3939
def test_reflect_select(self, engine, connection):
@@ -64,7 +64,7 @@ def test_reserved_words(self, engine, connection):
6464
self.assertNotIn('"map"', query)
6565

6666
def test_switch_database(self):
67-
engine = create_engine('hive://hadoop@localhost:10000/pyhive_test_database')
67+
engine = create_engine('hive://localhost:10000/pyhive_test_database')
6868
try:
6969
with contextlib.closing(engine.connect()) as connection:
7070
self.assertEqual(

pyhive/tests/test_sqlalchemy_presto.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def test_reflect_select(self, engine, connection):
3333
0.5,
3434
0.25,
3535
'a string',
36-
0,
36+
'1970-01-01 00:00:00.000',
3737
'123',
3838
'[1,2]',
3939
'{"1":2,"3":4}', # Presto converts all keys to strings so that they're valid JSON

scripts/make_many_rows.sh

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
#!/bin/bash
22

3-
hive -e 'DROP TABLE IF EXISTS many_rows'
3+
temp_file=/tmp/pyhive_test_data_many_rows.tsv
4+
seq 0 9999 > $temp_file
5+
46
hive -e "
7+
DROP TABLE IF EXISTS many_rows;
58
CREATE TABLE many_rows (
69
a INT
710
) PARTITIONED BY (
811
b STRING
9-
) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS TEXTFILE"
10-
11-
temp_file=/tmp/pyhive_test_data_many_rows.tsv
12-
seq 0 9999 > $temp_file
13-
hive -e "LOAD DATA LOCAL INPATH '$temp_file' INTO TABLE many_rows PARTITION (b='blah')"
12+
) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS TEXTFILE;
13+
LOAD DATA LOCAL INPATH '$temp_file' INTO TABLE many_rows PARTITION (b='blah');
14+
"
1415
rm -f $temp_file

scripts/make_one_row.sh

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
#!/bin/bash -eux
2-
hive -e 'DROP TABLE IF EXISTS one_row'
3-
hive -e 'CREATE TABLE one_row (number_of_rows INT)'
4-
hive -e 'INSERT OVERWRITE TABLE one_row SELECT COUNT(*) + 1 FROM one_row'
2+
hive -e '
3+
set mapred.job.tracker=local;
4+
DROP TABLE IF EXISTS one_row;
5+
CREATE TABLE one_row (number_of_rows INT);
6+
INSERT OVERWRITE TABLE one_row SELECT COUNT(*) + 1 FROM one_row;
7+
'

0 commit comments

Comments
 (0)