Skip to content

Commit 35a2216

Browse files
init commit
0 parents  commit 35a2216

31 files changed

+12218
-0
lines changed

.gitignore

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
*~
2+
.DS_Store
3+
*.pyc
4+
*.egg
5+
.idea/
6+
*.iml
7+
.project
8+
.pydevproject
9+
.settings
10+
/*.egg-info/
11+
/dist/
12+
/build/
13+
cover/
14+
.coverage*
15+
/htmlcov/
16+
.cache/
17+
.tox
18+
.python-version

LICENSE

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
Copyright 2017 laughingman7743
2+
3+
Permission is hereby granted, free of charge, to any person obtaining a copy of
4+
this software and associated documentation files (the "Software"), to deal in
5+
the Software without restriction, including without limitation the rights to
6+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
7+
the Software, and to permit persons to whom the Software is furnished to do so,
8+
subject to the following conditions:
9+
10+
The above copyright notice and this permission notice shall be included in all
11+
copies or substantial portions of the Software.
12+
13+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
15+
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
16+
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
17+
AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

MANIFEST.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
include README.rst

README.rst

Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
.. image:: https://img.shields.io/pypi/pyversions/PyAthena.svg
2+
:target: https://pypi.python.org/pypi/PyAthena/
3+
4+
.. image:: https://circleci.com/gh/laughingman7743/PyAthena.svg?style=shield
5+
:target: https://circleci.com/gh/laughingman7743/PyAthena
6+
7+
.. image:: https://codecov.io/gh/laughingman7743/PyAthena/branch/master/graph/badge.svg
8+
:target: https://codecov.io/gh/laughingman7743/PyAthena
9+
10+
.. image:: https://img.shields.io/pypi/l/PyAthena.svg
11+
:target: https://github.com/laughingman7743/PyAthena/blob/master/LICENSE
12+
13+
14+
PyAthena
15+
========
16+
17+
PyAthena is a Python `DB API 2.0 (PEP 249)`_ compliant client for `Amazon Athena`_.
18+
19+
.. _`DB API 2.0 (PEP 249)`: https://www.python.org/dev/peps/pep-0249/
20+
.. _`Amazon Athena`: http://docs.aws.amazon.com/athena/latest/APIReference/Welcome.html
21+
22+
Requirements
23+
------------
24+
25+
* Python
26+
27+
- CPython 2.6, 2,7, 3,4, 3.5, 3.6
28+
29+
Installation
30+
------------
31+
32+
.. code:: bash
33+
34+
$ pip install PyAthena
35+
36+
Extra packages:
37+
38+
+---------------+--------------------------------------+----------+
39+
| Package | Install command | Version |
40+
+===============+======================================+==========+
41+
| Pandas | ``pip install PyAthena[Pandas]`` | >=0.19.0 |
42+
+---------------+--------------------------------------+----------+
43+
| SQLAlchemy | ``pip install PyAthena[SQLAlchemy]`` | >=1.0.0 |
44+
+---------------+--------------------------------------+----------+
45+
46+
Usage
47+
-----
48+
49+
Basic usage
50+
~~~~~~~~~~~
51+
52+
.. code:: python
53+
54+
from pyathena import connect
55+
56+
cursor = connect(aws_access_key_id='YOUR_ACCESS_KEY_ID',
57+
aws_secret_access_key='YOUR_SECRET_ACCESS_KEY',
58+
s3_staging_dir='s3://YOUR_S3_BUCKET/path/to/',
59+
region_name='us-west-2').cursor()
60+
cursor.execute("SELECT * FROM one_row")
61+
print(cursor.description)
62+
print(cursor.fetchall())
63+
64+
Cursor iteration
65+
~~~~~~~~~~~~~~~~
66+
67+
.. code:: python
68+
69+
from pyathena import connect
70+
71+
cursor = connect(aws_access_key_id='YOUR_ACCESS_KEY_ID',
72+
aws_secret_access_key='YOUR_SECRET_ACCESS_KEY',
73+
s3_staging_dir='s3://YOUR_S3_BUCKET/path/to/',
74+
region_name='us-west-2').cursor()
75+
cursor.execute("SELECT * FROM many_rows LIMIT 10")
76+
for row in cursor:
77+
print(row)
78+
79+
Query with parameter
80+
~~~~~~~~~~~~~~~~~~~~
81+
82+
Supported `DB API paramstyle`_ is only ``PyFormat``.
83+
``PyFormat`` only supports `named placeholders`_ with old ``%`` operator style and parameters specify dictionary format.
84+
85+
.. code:: python
86+
87+
from pyathena import connect
88+
89+
cursor = connect(aws_access_key_id='YOUR_ACCESS_KEY_ID',
90+
aws_secret_access_key='YOUR_SECRET_ACCESS_KEY',
91+
s3_staging_dir='s3://YOUR_S3_BUCKET/path/to/',
92+
region_name='us-west-2').cursor()
93+
cursor.execute("""
94+
SELECT col_string FROM one_row_complex
95+
WHERE col_string = %(param)s
96+
""", {'param': 'a string'})
97+
print(cursor.fetchall())
98+
99+
if ``%`` character is contained in your query, it must be escaped with ``%%`` like the following:
100+
101+
.. code:: sql
102+
103+
SELECT col_string FROM one_row_complex
104+
WHERE col_string = %(param)s OR col_string LIKE 'a%%'
105+
106+
.. _`DB API paramstyle`: https://www.python.org/dev/peps/pep-0249/#paramstyle
107+
.. _`named placeholders`: https://pyformat.info/#named_placeholders
108+
109+
SQLAlchemy
110+
~~~~~~~~~~
111+
112+
Install SQLAlchemy with ``pip install SQLAlchemy>=1.0.0`` or ``pip install PyAthena[SQLAlchemy]``.
113+
Supported SQLAlchemy is 1.0.0 or higher.
114+
115+
.. code:: python
116+
117+
from urllib.parse import quote_plus # PY2: from urllib import quote_plus
118+
from sqlalchemy.engine import create_engine
119+
from sqlalchemy.sql.expression import select
120+
from sqlalchemy.sql.functions import func
121+
from sqlalchemy.sql.schema import Table, MetaData
122+
123+
conn_str = 'awsathena+rest://{aws_access_key_id}:{aws_secret_access_key}@athena.{region_name}.amazonaws.com:443/'\
124+
'{schema_name}?s3_staging_dir={s3_staging_dir}'
125+
engine = create_engine(conn_str.format(
126+
aws_access_key_id=quote_plus('YOUR_ACCESS_KEY_ID'),
127+
aws_secret_access_key=quote_plus('YOUR_SECRET_ACCESS_KEY'),
128+
region_name='us-west-2',
129+
schema_name='default',
130+
s3_staging_dir=quote_plus('s3://YOUR_S3_BUCKET/path/to/')))
131+
many_rows = Table('many_rows', MetaData(bind=engine), autoload=True)
132+
print(select([func.count('*')], from_obj=many_rows).scalar())
133+
134+
The connection string has the following format:
135+
136+
.. code:: python
137+
138+
awsathena+rest://{aws_access_key_id}:{aws_secret_access_key}@athena.{region_name}.amazonaws.com:443/{schema_name}?s3_staging_dir={s3_staging_dir}&...
139+
140+
NOTE: ``s3_staging_dir`` requires quote. If ``aws_access_key_id``, ``aws_secret_access_key`` and other parameter contain special characters, quote is also required.
141+
142+
Pandas
143+
~~~~~~
144+
145+
Minimal example for Pandas DataFrame:
146+
147+
.. code:: python
148+
149+
from pyathena import connect
150+
import pandas as pd
151+
152+
conn = connect(aws_access_key_id='YOUR_ACCESS_KEY_ID',
153+
aws_secret_access_key='YOUR_SECRET_ACCESS_KEY',
154+
s3_staging_dir='s3://YOUR_S3_BUCKET/path/to/',
155+
region_name='us-west-2')
156+
df = pd.read_sql("SELECT * FROM many_rows", conn)
157+
print(df.head())
158+
159+
As Pandas DataFrame:
160+
161+
.. code:: python
162+
163+
from pyathena import connect
164+
from pyathena.util import as_pandas
165+
166+
cursor = connect(aws_access_key_id='YOUR_ACCESS_KEY_ID',
167+
aws_secret_access_key='YOUR_SECRET_ACCESS_KEY',
168+
s3_staging_dir='s3://YOUR_S3_BUCKET/path/to/',
169+
region_name='us-west-2').cursor()
170+
cursor.execute("SELECT * FROM many_rows")
171+
df = as_pandas(cursor)
172+
print(df.describe())
173+
174+
Credentials
175+
-----------
176+
177+
Support `Boto3 credentials`_.
178+
179+
.. _`Boto3 credentials`: http://boto3.readthedocs.io/en/latest/guide/configuration.html
180+
181+
Additional environment variable:
182+
183+
.. code:: bash
184+
185+
$ export AWS_ATHENA_S3_STAGING_DIR=s3://YOUR_S3_BUCKET/path/to/
186+
187+
Testing
188+
-------
189+
190+
Depends on the AWS CLI credentials and the following environment variables:
191+
192+
~/.aws/credentials
193+
194+
.. code:: cfg
195+
196+
[default]
197+
aws_access_key_id=YOUR_ACCESS_KEY_ID
198+
aws_secret_access_key=YOUR_SECRET_ACCESS_KEY
199+
200+
Environment variables
201+
202+
.. code:: bash
203+
204+
$ export AWS_DEFAULT_REGION=us-west-2
205+
$ export AWS_ATHENA_S3_STAGING_DIR=s3://YOUR_S3_BUCKET/path/to/
206+
207+
Run test
208+
~~~~~~~~
209+
210+
.. code:: bash
211+
212+
$ pip install pytest awscli
213+
$ scripts/upload_test_data.sh
214+
$ py.test
215+
$ scripts/delete_test_data.sh
216+
217+
Run test multiple Python versions
218+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
219+
220+
.. code:: bash
221+
222+
$ pip install tox awscli
223+
$ scripts/upload_test_data.sh
224+
$ pyenv local 2.6.9 2.7.13 3.4.6 3.5.3 3.6.1
225+
$ tox
226+
$ scripts/delete_test_data.sh

circle.yml

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
machine:
2+
pre:
3+
- cd /opt/circleci/.pyenv/plugins/python-build/../.. && git pull && cd -
4+
python:
5+
version: '3.6.1'
6+
environment:
7+
PY26: '2.6.9'
8+
PY27: '2.7.13'
9+
PY34: '3.4.6'
10+
PY35: '3.5.3'
11+
PY36: '3.6.1'
12+
13+
dependencies:
14+
cache_directories:
15+
- '/opt/circleci/.pyenv'
16+
override:
17+
- pyenv install -sk ${PY26}
18+
- pyenv install -sk ${PY27}
19+
- pyenv install -sk ${PY34}
20+
- pyenv install -sk ${PY35}
21+
- pyenv install -sk ${PY36}
22+
- pyenv local ${PY27} ${PY26} ${PY34} ${PY35} ${PY36}
23+
- pip install -U pip detox codecov awscli
24+
25+
test:
26+
pre:
27+
- ./scripts/upload_test_data.sh
28+
override:
29+
- tox
30+
post:
31+
# - ./scripts/delete_test_data.sh
32+
- bash <(curl -s https://codecov.io/bash)

pyathena/__init__.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# -*- coding: utf-8 -*-
2+
from __future__ import absolute_import
3+
from __future__ import unicode_literals
4+
import datetime
5+
6+
from pyathena.error import * # noqa
7+
8+
9+
__version__ = '1.0.0'
10+
11+
12+
# Globals https://www.python.org/dev/peps/pep-0249/#globals
13+
apilevel = '2.0'
14+
threadsafety = 3
15+
paramstyle = 'pyformat'
16+
17+
18+
class DBAPITypeObject:
19+
"""Type Objects and Constructors
20+
21+
https://www.python.org/dev/peps/pep-0249/#type-objects-and-constructors
22+
"""
23+
def __init__(self, *values):
24+
self.values = values
25+
26+
def __cmp__(self, other):
27+
if other in self.values:
28+
return 0
29+
if other < self.values:
30+
return 1
31+
else:
32+
return -1
33+
34+
35+
STRING = DBAPITypeObject('CHAR', 'NCHAR',
36+
'VARCHAR', 'NVARCHAR',
37+
'LONGVARCHAR', 'LONGNVARCHAR')
38+
BINARY = DBAPITypeObject('BINARY', 'VARBINARY', 'LONGVARBINARY')
39+
NUMBER = DBAPITypeObject('BOOLEAN', 'TINYINT', 'SMALLINT', 'BIGINT', 'INTEGER',
40+
'REAL', 'DOUBLE', 'FLOAT', 'DECIMAL', 'NUMERIC')
41+
DATETIME = DBAPITypeObject('TIMESTAMP')
42+
ROWID = DBAPITypeObject('')
43+
44+
45+
Date = datetime.date
46+
Time = datetime.time
47+
Timestamp = datetime.datetime
48+
49+
50+
def connect(s3_staging_dir=None, region_name=None, schema_name='default',
51+
poll_interval=1, encryption_option=None, kms_key=None, profile_name=None,
52+
converter=None, formatter=None, **kwargs):
53+
from pyathena.connection import Connection
54+
return Connection(s3_staging_dir, region_name, schema_name,
55+
poll_interval, encryption_option, kms_key, profile_name,
56+
converter, formatter, **kwargs)

0 commit comments

Comments
 (0)