Add docstrings to SQLAlchemy implementation classes

laughingman7743 · claude · laughingman7743 · commit 17682c818ced · 2026-01-01T23:05:41.000+09:00
Add documentation for: - Compiler classes: AthenaTypeCompiler, AthenaStatementCompiler, AthenaDDLCompiler - Preparer classes: AthenaDMLIdentifierPreparer, AthenaDDLIdentifierPreparer - Type classes: AthenaTimestamp, AthenaDate, Tinyint, TINYINT, AthenaStruct, STRUCT, AthenaMap, MAP, AthenaArray, ARRAY - Utility classes: _HashableDict - Module docstring for util.py 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
diff --git a/pyathena/sqlalchemy/compiler.py b/pyathena/sqlalchemy/compiler.py
@@ -41,6 +41,26 @@
 
 
 class AthenaTypeCompiler(GenericTypeCompiler):
+    """Type compiler for Amazon Athena SQL types.
+
+    This compiler translates SQLAlchemy type objects into Athena-compatible
+    SQL type strings for use in DDL statements. It handles the mapping between
+    SQLAlchemy's portable types and Athena's specific type syntax.
+
+    Athena has specific requirements for type names that differ from standard
+    SQL. For example, FLOAT maps to REAL in CAST expressions, and various
+    string types (TEXT, NCHAR, NVARCHAR) all map to STRING.
+
+    The compiler also supports Athena-specific complex types:
+    - STRUCT/ROW: Nested record types with named fields
+    - MAP: Key-value pair collections
+    - ARRAY: Ordered collections of elements
+
+    See Also:
+        AWS Athena Data Types:
+        https://docs.aws.amazon.com/athena/latest/ug/data-types.html
+    """
+
     def visit_FLOAT(self, type_: Type[Any], **kw) -> str:  # noqa: N802
         return self.visit_REAL(type_, **kw)
 
@@ -180,6 +200,25 @@ def visit_ARRAY(self, type_, **kw):  # noqa: N802
 
 
 class AthenaStatementCompiler(SQLCompiler):
+    """SQL statement compiler for Amazon Athena queries.
+
+    This compiler generates Athena-compatible SQL statements from SQLAlchemy
+    expression constructs. It handles Athena-specific SQL syntax including:
+
+    - Function name mapping (e.g., char_length -> length)
+    - Lambda expressions in functions like filter()
+    - CAST expressions with Athena type requirements
+    - OFFSET/LIMIT clause ordering (Athena uses OFFSET before LIMIT)
+    - Time travel hints (FOR TIMESTAMP AS OF, FOR VERSION AS OF)
+
+    The compiler ensures that generated SQL is compatible with Presto/Trino
+    syntax used by Athena engine versions 2 and 3.
+
+    See Also:
+        AWS Athena SQL Reference:
+        https://docs.aws.amazon.com/athena/latest/ug/ddl-sql-reference.html
+    """
+
     def visit_char_length_func(self, fn: "FunctionElement[Any]", **kw):
         return f"length{self.function_argspec(fn, **kw)}"
 
@@ -259,6 +298,42 @@ def format_from_hint_text(self, sqltext, table, hint, iscrud):
 
 
 class AthenaDDLCompiler(DDLCompiler):
+    """DDL compiler for Amazon Athena CREATE TABLE and related statements.
+
+    This compiler generates Athena-compatible DDL statements including support
+    for Athena-specific table options:
+
+    - External table creation (EXTERNAL keyword for Hive-style tables)
+    - Iceberg table creation (managed tables with ACID support)
+    - File formats: PARQUET, ORC, TEXTFILE, JSON, AVRO, etc.
+    - Row formats with SerDe specifications
+    - Compression settings for various file formats
+    - Table locations in S3
+    - Partitioning (both Hive-style and Iceberg transforms)
+    - Bucketing/clustering for optimized queries
+
+    The compiler uses backtick quoting for DDL identifiers (different from
+    DML which uses double quotes) and handles Athena's reserved words.
+
+    Example:
+        A table created with this compiler might generate::
+
+            CREATE EXTERNAL TABLE IF NOT EXISTS my_schema.my_table (
+                id INT,
+                name STRING
+            )
+            PARTITIONED BY (
+                dt STRING
+            )
+            STORED AS PARQUET
+            LOCATION 's3://my-bucket/my-table/'
+            TBLPROPERTIES ('parquet.compress' = 'SNAPPY')
+
+    See Also:
+        AWS Athena CREATE TABLE:
+        https://docs.aws.amazon.com/athena/latest/ug/create-table.html
+    """
+
     @property
     def preparer(self) -> IdentifierPreparer:
         return self._preparer
diff --git a/pyathena/sqlalchemy/preparer.py b/pyathena/sqlalchemy/preparer.py
@@ -12,10 +12,42 @@
 
 
 class AthenaDMLIdentifierPreparer(IdentifierPreparer):
+    """Identifier preparer for Athena DML (SELECT, INSERT, etc.) statements.
+
+    This preparer handles quoting and escaping of identifiers in DML statements.
+    It uses double quotes for identifiers and recognizes Athena's SELECT
+    statement reserved words to determine when quoting is necessary.
+
+    Athena's DML syntax follows Presto/Trino conventions, which differ from
+    DDL syntax (which uses Hive conventions with backticks).
+
+    See Also:
+        :class:`AthenaDDLIdentifierPreparer`: Preparer for DDL statements.
+        AWS Athena Reserved Words:
+        https://docs.aws.amazon.com/athena/latest/ug/reserved-words.html
+    """
+
     reserved_words: Set[str] = SELECT_STATEMENT_RESERVED_WORDS
 
 
 class AthenaDDLIdentifierPreparer(IdentifierPreparer):
+    """Identifier preparer for Athena DDL (CREATE, ALTER, DROP) statements.
+
+    This preparer handles quoting and escaping of identifiers in DDL statements.
+    It uses backticks for identifiers (Hive convention) rather than double
+    quotes (Presto/Trino convention used in DML).
+
+    Key differences from DML preparer:
+    - Uses backtick (`) as the quote character
+    - Recognizes DDL-specific reserved words
+    - Treats underscore (_) as an illegal initial character
+
+    See Also:
+        :class:`AthenaDMLIdentifierPreparer`: Preparer for DML statements.
+        AWS Athena DDL Reserved Words:
+        https://docs.aws.amazon.com/athena/latest/ug/reserved-words.html
+    """
+
     reserved_words = DDL_RESERVED_WORDS
     illegal_initial_characters = ILLEGAL_INITIAL_CHARACTERS.union("_")
 
diff --git a/pyathena/sqlalchemy/types.py b/pyathena/sqlalchemy/types.py
@@ -13,6 +13,24 @@
 
 
 class AthenaTimestamp(TypeEngine[datetime]):
+    """SQLAlchemy type for Athena TIMESTAMP values.
+
+    This type handles the conversion of Python datetime objects to Athena's
+    TIMESTAMP literal syntax. When used in queries, datetime values are
+    rendered as ``TIMESTAMP 'YYYY-MM-DD HH:MM:SS.mmm'``.
+
+    The type supports millisecond precision (3 decimal places) which matches
+    Athena's TIMESTAMP type precision.
+
+    Example:
+        >>> from sqlalchemy import Column, Table, MetaData
+        >>> from pyathena.sqlalchemy.types import AthenaTimestamp
+        >>> metadata = MetaData()
+        >>> events = Table('events', metadata,
+        ...     Column('event_time', AthenaTimestamp)
+        ... )
+    """
+
     render_literal_cast = True
     render_bind_cast = True
 
@@ -27,6 +45,21 @@ def literal_processor(self, dialect: "Dialect") -> Optional["_LiteralProcessorTy
 
 
 class AthenaDate(TypeEngine[date]):
+    """SQLAlchemy type for Athena DATE values.
+
+    This type handles the conversion of Python date objects to Athena's
+    DATE literal syntax. When used in queries, date values are rendered
+    as ``DATE 'YYYY-MM-DD'``.
+
+    Example:
+        >>> from sqlalchemy import Column, Table, MetaData
+        >>> from pyathena.sqlalchemy.types import AthenaDate
+        >>> metadata = MetaData()
+        >>> orders = Table('orders', metadata,
+        ...     Column('order_date', AthenaDate)
+        ... )
+    """
+
     render_literal_cast = True
     render_bind_cast = True
 
@@ -41,14 +74,53 @@ def literal_processor(self, dialect: "Dialect") -> Optional["_LiteralProcessorTy
 
 
 class Tinyint(sqltypes.Integer):
+    """SQLAlchemy type for Athena TINYINT (8-bit signed integer).
+
+    TINYINT stores values from -128 to 127. This type is useful for
+    columns that contain small integer values to optimize storage.
+    """
+
     __visit_name__ = "tinyint"
 
 
 class TINYINT(Tinyint):
+    """Uppercase alias for Tinyint type.
+
+    This provides SQLAlchemy-style uppercase naming convention.
+    """
+
     __visit_name__ = "TINYINT"
 
 
 class AthenaStruct(TypeEngine[Dict[str, Any]]):
+    """SQLAlchemy type for Athena STRUCT/ROW complex type.
+
+    STRUCT represents a record with named fields, similar to a database row
+    or a Python dictionary with typed values. Each field has a name and a
+    data type.
+
+    Args:
+        *fields: Field specifications. Each can be either:
+            - A string (field name, defaults to STRING type)
+            - A tuple of (field_name, field_type)
+
+    Example:
+        >>> from sqlalchemy import Column, Table, MetaData, types
+        >>> from pyathena.sqlalchemy.types import AthenaStruct
+        >>> metadata = MetaData()
+        >>> users = Table('users', metadata,
+        ...     Column('address', AthenaStruct(
+        ...         ('street', types.String),
+        ...         ('city', types.String),
+        ...         ('zip_code', types.Integer)
+        ...     ))
+        ... )
+
+    See Also:
+        AWS Athena STRUCT Type:
+        https://docs.aws.amazon.com/athena/latest/ug/rows-and-structs.html
+    """
+
     __visit_name__ = "struct"
 
     def __init__(self, *fields: Union[str, Tuple[str, Any]]) -> None:
@@ -76,10 +148,34 @@ def python_type(self) -> type:
 
 
 class STRUCT(AthenaStruct):
+    """Uppercase alias for AthenaStruct type."""
+
     __visit_name__ = "STRUCT"
 
 
 class AthenaMap(TypeEngine[Dict[str, Any]]):
+    """SQLAlchemy type for Athena MAP complex type.
+
+    MAP represents a collection of key-value pairs where all keys have the
+    same type and all values have the same type.
+
+    Args:
+        key_type: SQLAlchemy type for map keys. Defaults to String.
+        value_type: SQLAlchemy type for map values. Defaults to String.
+
+    Example:
+        >>> from sqlalchemy import Column, Table, MetaData, types
+        >>> from pyathena.sqlalchemy.types import AthenaMap
+        >>> metadata = MetaData()
+        >>> settings = Table('settings', metadata,
+        ...     Column('config', AthenaMap(types.String, types.Integer))
+        ... )
+
+    See Also:
+        AWS Athena MAP Type:
+        https://docs.aws.amazon.com/athena/latest/ug/maps.html
+    """
+
     __visit_name__ = "map"
 
     def __init__(self, key_type: Any = None, value_type: Any = None) -> None:
@@ -105,10 +201,32 @@ def python_type(self) -> type:
 
 
 class MAP(AthenaMap):
+    """Uppercase alias for AthenaMap type."""
+
     __visit_name__ = "MAP"
 
 
 class AthenaArray(TypeEngine[List[Any]]):
+    """SQLAlchemy type for Athena ARRAY complex type.
+
+    ARRAY represents an ordered collection of elements of the same type.
+
+    Args:
+        item_type: SQLAlchemy type for array elements. Defaults to String.
+
+    Example:
+        >>> from sqlalchemy import Column, Table, MetaData, types
+        >>> from pyathena.sqlalchemy.types import AthenaArray
+        >>> metadata = MetaData()
+        >>> posts = Table('posts', metadata,
+        ...     Column('tags', AthenaArray(types.String))
+        ... )
+
+    See Also:
+        AWS Athena ARRAY Type:
+        https://docs.aws.amazon.com/athena/latest/ug/arrays.html
+    """
+
     __visit_name__ = "array"
 
     def __init__(self, item_type: Any = None) -> None:
@@ -126,4 +244,6 @@ def python_type(self) -> type:
 
 
 class ARRAY(AthenaArray):
+    """Uppercase alias for AthenaArray type."""
+
     __visit_name__ = "ARRAY"
diff --git a/pyathena/sqlalchemy/util.py b/pyathena/sqlalchemy/util.py
@@ -1,6 +1,14 @@
 # -*- coding: utf-8 -*-
+"""Utility classes for PyAthena SQLAlchemy dialect."""
 
 
 class _HashableDict(dict):  # type: ignore
+    """A dictionary subclass that can be used as a dictionary key.
+
+    SQLAlchemy's reflection caching requires hashable objects. This class
+    enables dictionary values (like table properties) to be cached by
+    making them hashable through tuple conversion.
+    """
+
     def __hash__(self):  # type: ignore
         return hash(tuple(sorted(self.items())))