Skip to content

Commit 17682c8

Browse files
Add docstrings to SQLAlchemy implementation classes
Add documentation for: - Compiler classes: AthenaTypeCompiler, AthenaStatementCompiler, AthenaDDLCompiler - Preparer classes: AthenaDMLIdentifierPreparer, AthenaDDLIdentifierPreparer - Type classes: AthenaTimestamp, AthenaDate, Tinyint, TINYINT, AthenaStruct, STRUCT, AthenaMap, MAP, AthenaArray, ARRAY - Utility classes: _HashableDict - Module docstring for util.py 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 6ad4222 commit 17682c8

File tree

4 files changed

+235
-0
lines changed

4 files changed

+235
-0
lines changed

pyathena/sqlalchemy/compiler.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,26 @@
4141

4242

4343
class AthenaTypeCompiler(GenericTypeCompiler):
44+
"""Type compiler for Amazon Athena SQL types.
45+
46+
This compiler translates SQLAlchemy type objects into Athena-compatible
47+
SQL type strings for use in DDL statements. It handles the mapping between
48+
SQLAlchemy's portable types and Athena's specific type syntax.
49+
50+
Athena has specific requirements for type names that differ from standard
51+
SQL. For example, FLOAT maps to REAL in CAST expressions, and various
52+
string types (TEXT, NCHAR, NVARCHAR) all map to STRING.
53+
54+
The compiler also supports Athena-specific complex types:
55+
- STRUCT/ROW: Nested record types with named fields
56+
- MAP: Key-value pair collections
57+
- ARRAY: Ordered collections of elements
58+
59+
See Also:
60+
AWS Athena Data Types:
61+
https://docs.aws.amazon.com/athena/latest/ug/data-types.html
62+
"""
63+
4464
def visit_FLOAT(self, type_: Type[Any], **kw) -> str: # noqa: N802
4565
return self.visit_REAL(type_, **kw)
4666

@@ -180,6 +200,25 @@ def visit_ARRAY(self, type_, **kw): # noqa: N802
180200

181201

182202
class AthenaStatementCompiler(SQLCompiler):
203+
"""SQL statement compiler for Amazon Athena queries.
204+
205+
This compiler generates Athena-compatible SQL statements from SQLAlchemy
206+
expression constructs. It handles Athena-specific SQL syntax including:
207+
208+
- Function name mapping (e.g., char_length -> length)
209+
- Lambda expressions in functions like filter()
210+
- CAST expressions with Athena type requirements
211+
- OFFSET/LIMIT clause ordering (Athena uses OFFSET before LIMIT)
212+
- Time travel hints (FOR TIMESTAMP AS OF, FOR VERSION AS OF)
213+
214+
The compiler ensures that generated SQL is compatible with Presto/Trino
215+
syntax used by Athena engine versions 2 and 3.
216+
217+
See Also:
218+
AWS Athena SQL Reference:
219+
https://docs.aws.amazon.com/athena/latest/ug/ddl-sql-reference.html
220+
"""
221+
183222
def visit_char_length_func(self, fn: "FunctionElement[Any]", **kw):
184223
return f"length{self.function_argspec(fn, **kw)}"
185224

@@ -259,6 +298,42 @@ def format_from_hint_text(self, sqltext, table, hint, iscrud):
259298

260299

261300
class AthenaDDLCompiler(DDLCompiler):
301+
"""DDL compiler for Amazon Athena CREATE TABLE and related statements.
302+
303+
This compiler generates Athena-compatible DDL statements including support
304+
for Athena-specific table options:
305+
306+
- External table creation (EXTERNAL keyword for Hive-style tables)
307+
- Iceberg table creation (managed tables with ACID support)
308+
- File formats: PARQUET, ORC, TEXTFILE, JSON, AVRO, etc.
309+
- Row formats with SerDe specifications
310+
- Compression settings for various file formats
311+
- Table locations in S3
312+
- Partitioning (both Hive-style and Iceberg transforms)
313+
- Bucketing/clustering for optimized queries
314+
315+
The compiler uses backtick quoting for DDL identifiers (different from
316+
DML which uses double quotes) and handles Athena's reserved words.
317+
318+
Example:
319+
A table created with this compiler might generate::
320+
321+
CREATE EXTERNAL TABLE IF NOT EXISTS my_schema.my_table (
322+
id INT,
323+
name STRING
324+
)
325+
PARTITIONED BY (
326+
dt STRING
327+
)
328+
STORED AS PARQUET
329+
LOCATION 's3://my-bucket/my-table/'
330+
TBLPROPERTIES ('parquet.compress' = 'SNAPPY')
331+
332+
See Also:
333+
AWS Athena CREATE TABLE:
334+
https://docs.aws.amazon.com/athena/latest/ug/create-table.html
335+
"""
336+
262337
@property
263338
def preparer(self) -> IdentifierPreparer:
264339
return self._preparer

pyathena/sqlalchemy/preparer.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,42 @@
1212

1313

1414
class AthenaDMLIdentifierPreparer(IdentifierPreparer):
15+
"""Identifier preparer for Athena DML (SELECT, INSERT, etc.) statements.
16+
17+
This preparer handles quoting and escaping of identifiers in DML statements.
18+
It uses double quotes for identifiers and recognizes Athena's SELECT
19+
statement reserved words to determine when quoting is necessary.
20+
21+
Athena's DML syntax follows Presto/Trino conventions, which differ from
22+
DDL syntax (which uses Hive conventions with backticks).
23+
24+
See Also:
25+
:class:`AthenaDDLIdentifierPreparer`: Preparer for DDL statements.
26+
AWS Athena Reserved Words:
27+
https://docs.aws.amazon.com/athena/latest/ug/reserved-words.html
28+
"""
29+
1530
reserved_words: Set[str] = SELECT_STATEMENT_RESERVED_WORDS
1631

1732

1833
class AthenaDDLIdentifierPreparer(IdentifierPreparer):
34+
"""Identifier preparer for Athena DDL (CREATE, ALTER, DROP) statements.
35+
36+
This preparer handles quoting and escaping of identifiers in DDL statements.
37+
It uses backticks for identifiers (Hive convention) rather than double
38+
quotes (Presto/Trino convention used in DML).
39+
40+
Key differences from DML preparer:
41+
- Uses backtick (`) as the quote character
42+
- Recognizes DDL-specific reserved words
43+
- Treats underscore (_) as an illegal initial character
44+
45+
See Also:
46+
:class:`AthenaDMLIdentifierPreparer`: Preparer for DML statements.
47+
AWS Athena DDL Reserved Words:
48+
https://docs.aws.amazon.com/athena/latest/ug/reserved-words.html
49+
"""
50+
1951
reserved_words = DDL_RESERVED_WORDS
2052
illegal_initial_characters = ILLEGAL_INITIAL_CHARACTERS.union("_")
2153

pyathena/sqlalchemy/types.py

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,24 @@
1313

1414

1515
class AthenaTimestamp(TypeEngine[datetime]):
16+
"""SQLAlchemy type for Athena TIMESTAMP values.
17+
18+
This type handles the conversion of Python datetime objects to Athena's
19+
TIMESTAMP literal syntax. When used in queries, datetime values are
20+
rendered as ``TIMESTAMP 'YYYY-MM-DD HH:MM:SS.mmm'``.
21+
22+
The type supports millisecond precision (3 decimal places) which matches
23+
Athena's TIMESTAMP type precision.
24+
25+
Example:
26+
>>> from sqlalchemy import Column, Table, MetaData
27+
>>> from pyathena.sqlalchemy.types import AthenaTimestamp
28+
>>> metadata = MetaData()
29+
>>> events = Table('events', metadata,
30+
... Column('event_time', AthenaTimestamp)
31+
... )
32+
"""
33+
1634
render_literal_cast = True
1735
render_bind_cast = True
1836

@@ -27,6 +45,21 @@ def literal_processor(self, dialect: "Dialect") -> Optional["_LiteralProcessorTy
2745

2846

2947
class AthenaDate(TypeEngine[date]):
48+
"""SQLAlchemy type for Athena DATE values.
49+
50+
This type handles the conversion of Python date objects to Athena's
51+
DATE literal syntax. When used in queries, date values are rendered
52+
as ``DATE 'YYYY-MM-DD'``.
53+
54+
Example:
55+
>>> from sqlalchemy import Column, Table, MetaData
56+
>>> from pyathena.sqlalchemy.types import AthenaDate
57+
>>> metadata = MetaData()
58+
>>> orders = Table('orders', metadata,
59+
... Column('order_date', AthenaDate)
60+
... )
61+
"""
62+
3063
render_literal_cast = True
3164
render_bind_cast = True
3265

@@ -41,14 +74,53 @@ def literal_processor(self, dialect: "Dialect") -> Optional["_LiteralProcessorTy
4174

4275

4376
class Tinyint(sqltypes.Integer):
77+
"""SQLAlchemy type for Athena TINYINT (8-bit signed integer).
78+
79+
TINYINT stores values from -128 to 127. This type is useful for
80+
columns that contain small integer values to optimize storage.
81+
"""
82+
4483
__visit_name__ = "tinyint"
4584

4685

4786
class TINYINT(Tinyint):
87+
"""Uppercase alias for Tinyint type.
88+
89+
This provides SQLAlchemy-style uppercase naming convention.
90+
"""
91+
4892
__visit_name__ = "TINYINT"
4993

5094

5195
class AthenaStruct(TypeEngine[Dict[str, Any]]):
96+
"""SQLAlchemy type for Athena STRUCT/ROW complex type.
97+
98+
STRUCT represents a record with named fields, similar to a database row
99+
or a Python dictionary with typed values. Each field has a name and a
100+
data type.
101+
102+
Args:
103+
*fields: Field specifications. Each can be either:
104+
- A string (field name, defaults to STRING type)
105+
- A tuple of (field_name, field_type)
106+
107+
Example:
108+
>>> from sqlalchemy import Column, Table, MetaData, types
109+
>>> from pyathena.sqlalchemy.types import AthenaStruct
110+
>>> metadata = MetaData()
111+
>>> users = Table('users', metadata,
112+
... Column('address', AthenaStruct(
113+
... ('street', types.String),
114+
... ('city', types.String),
115+
... ('zip_code', types.Integer)
116+
... ))
117+
... )
118+
119+
See Also:
120+
AWS Athena STRUCT Type:
121+
https://docs.aws.amazon.com/athena/latest/ug/rows-and-structs.html
122+
"""
123+
52124
__visit_name__ = "struct"
53125

54126
def __init__(self, *fields: Union[str, Tuple[str, Any]]) -> None:
@@ -76,10 +148,34 @@ def python_type(self) -> type:
76148

77149

78150
class STRUCT(AthenaStruct):
151+
"""Uppercase alias for AthenaStruct type."""
152+
79153
__visit_name__ = "STRUCT"
80154

81155

82156
class AthenaMap(TypeEngine[Dict[str, Any]]):
157+
"""SQLAlchemy type for Athena MAP complex type.
158+
159+
MAP represents a collection of key-value pairs where all keys have the
160+
same type and all values have the same type.
161+
162+
Args:
163+
key_type: SQLAlchemy type for map keys. Defaults to String.
164+
value_type: SQLAlchemy type for map values. Defaults to String.
165+
166+
Example:
167+
>>> from sqlalchemy import Column, Table, MetaData, types
168+
>>> from pyathena.sqlalchemy.types import AthenaMap
169+
>>> metadata = MetaData()
170+
>>> settings = Table('settings', metadata,
171+
... Column('config', AthenaMap(types.String, types.Integer))
172+
... )
173+
174+
See Also:
175+
AWS Athena MAP Type:
176+
https://docs.aws.amazon.com/athena/latest/ug/maps.html
177+
"""
178+
83179
__visit_name__ = "map"
84180

85181
def __init__(self, key_type: Any = None, value_type: Any = None) -> None:
@@ -105,10 +201,32 @@ def python_type(self) -> type:
105201

106202

107203
class MAP(AthenaMap):
204+
"""Uppercase alias for AthenaMap type."""
205+
108206
__visit_name__ = "MAP"
109207

110208

111209
class AthenaArray(TypeEngine[List[Any]]):
210+
"""SQLAlchemy type for Athena ARRAY complex type.
211+
212+
ARRAY represents an ordered collection of elements of the same type.
213+
214+
Args:
215+
item_type: SQLAlchemy type for array elements. Defaults to String.
216+
217+
Example:
218+
>>> from sqlalchemy import Column, Table, MetaData, types
219+
>>> from pyathena.sqlalchemy.types import AthenaArray
220+
>>> metadata = MetaData()
221+
>>> posts = Table('posts', metadata,
222+
... Column('tags', AthenaArray(types.String))
223+
... )
224+
225+
See Also:
226+
AWS Athena ARRAY Type:
227+
https://docs.aws.amazon.com/athena/latest/ug/arrays.html
228+
"""
229+
112230
__visit_name__ = "array"
113231

114232
def __init__(self, item_type: Any = None) -> None:
@@ -126,4 +244,6 @@ def python_type(self) -> type:
126244

127245

128246
class ARRAY(AthenaArray):
247+
"""Uppercase alias for AthenaArray type."""
248+
129249
__visit_name__ = "ARRAY"

pyathena/sqlalchemy/util.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,14 @@
11
# -*- coding: utf-8 -*-
2+
"""Utility classes for PyAthena SQLAlchemy dialect."""
23

34

45
class _HashableDict(dict): # type: ignore
6+
"""A dictionary subclass that can be used as a dictionary key.
7+
8+
SQLAlchemy's reflection caching requires hashable objects. This class
9+
enables dictionary values (like table properties) to be cached by
10+
making them hashable through tuple conversion.
11+
"""
12+
513
def __hash__(self): # type: ignore
614
return hash(tuple(sorted(self.items())))

0 commit comments

Comments
 (0)