pyathena-dev
diff --git a/‎docs/api/s3fs.rst‎
Lines changed: 13 additions & 1 deletion b/‎docs/api/s3fs.rst‎
Lines changed: 13 additions & 1 deletion
diff --git a/‎docs/s3fs.rst‎
Lines changed: 80 additions & 3 deletions b/‎docs/s3fs.rst‎
Lines changed: 80 additions & 3 deletions
diff --git a/‎pyathena/s3fs/async_cursor.py‎
Lines changed: 11 additions & 3 deletions b/‎pyathena/s3fs/async_cursor.py‎
Lines changed: 11 additions & 3 deletions
diff --git a/‎pyathena/s3fs/cursor.py‎
Lines changed: 13 additions & 1 deletion b/‎pyathena/s3fs/cursor.py‎
Lines changed: 13 additions & 1 deletion
@@ -3,7 +3,7 @@
 S3FS Integration
 ================
 
-This section covers lightweight S3FS-based cursors and data converters that use Python's built-in ``csv`` module.
+This section covers lightweight S3FS-based cursors, CSV readers, and data converters.
 
 S3FS Cursors
 ------------
@@ -16,6 +16,18 @@ S3FS Cursors
    :members:
    :inherited-members:
 
+S3FS CSV Readers
+----------------
+
+S3FSCursor supports pluggable CSV reader implementations to control how NULL values
+and empty strings are handled when parsing Athena's CSV output.
+
+.. autoclass:: pyathena.s3fs.reader.AthenaCSVReader
+   :members:
+
+.. autoclass:: pyathena.s3fs.reader.DefaultCSVReader
+   :members:
+
 S3FS Data Converters
 --------------------
 
 
@@ -9,13 +9,13 @@ S3FSCursor
 ----------
 
 S3FSCursor is a lightweight cursor that directly handles the CSV file of the query execution result output to S3.
-Unlike ArrowCursor or PandasCursor, this cursor uses Python's built-in ``csv`` module to parse results,
-making it ideal for environments where installing pandas or pyarrow is not desirable.
+Unlike ArrowCursor or PandasCursor, this cursor does not require pandas or pyarrow dependencies,
+making it ideal for environments where installing these libraries is not desirable.
 
 **Key features:**
 
 - No pandas or pyarrow dependencies required
-- Uses Python's built-in ``csv`` module for parsing
+- Lightweight CSV parsing (custom parser or Python's built-in ``csv`` module)
 - Lower memory footprint for simple query results
 - Full DB API 2.0 compatibility
 
@@ -172,6 +172,83 @@ Then specify an instance of this class in the converter argument when creating a
     cursor = connect(s3_staging_dir="s3://YOUR_S3_BUCKET/path/to/",
                      region_name="us-west-2").cursor(S3FSCursor, converter=CustomS3FSTypeConverter())
 
+CSV Reader Options
+~~~~~~~~~~~~~~~~~~
+
+S3FSCursor supports pluggable CSV reader implementations to control how NULL values and empty strings
+are handled. Two readers are provided:
+
+- ``AthenaCSVReader`` (default): Custom parser that distinguishes between NULL and empty string
+- ``DefaultCSVReader``: Uses Python's built-in ``csv`` module (treats both NULL and empty string as empty string)
+
+**Default behavior (AthenaCSVReader):**
+
+By default, ``AthenaCSVReader`` is used, which correctly distinguishes between NULL
+values and empty strings in query results.
+
+.. code:: python
+
+    from pyathena import connect
+    from pyathena.s3fs.cursor import S3FSCursor
+
+    cursor = connect(s3_staging_dir="s3://YOUR_S3_BUCKET/path/to/",
+                     region_name="us-west-2",
+                     cursor_class=S3FSCursor).cursor()
+
+    cursor.execute("SELECT NULL AS null_col, '' AS empty_col")
+    row = cursor.fetchone()
+    print(row)  # (None, '')  - NULL is None, empty string is ''
+
+**Switching to Python's built-in csv module (DefaultCSVReader):**
+
+If you prefer to use Python's built-in ``csv`` module, you can switch to ``DefaultCSVReader``.
+Note that this reader cannot distinguish between NULL and empty string - both become empty strings
+in the parsed result, which are then converted to ``None`` by the type converter.
+
+.. code:: python
+
+    from pyathena import connect
+    from pyathena.s3fs.cursor import S3FSCursor
+    from pyathena.s3fs.reader import DefaultCSVReader
+
+    cursor = connect(s3_staging_dir="s3://YOUR_S3_BUCKET/path/to/",
+                     region_name="us-west-2",
+                     cursor_class=S3FSCursor,
+                     cursor_kwargs={"csv_reader": DefaultCSVReader}).cursor()
+
+    cursor.execute("SELECT NULL AS null_col, '' AS empty_col")
+    row = cursor.fetchone()
+    print(row)  # (None, None)  - Both NULL and empty string become None
+
+**Comparison of CSV readers:**
+
+.. list-table:: CSV Reader Behavior
+   :header-rows: 1
+   :widths: 30 20 25 25
+
+   * - Reader
+     - Implementation
+     - NULL value
+     - Empty string
+   * - AthenaCSVReader (default)
+     - Custom parser
+     - None
+     - '' (empty string)
+   * - DefaultCSVReader
+     - Python csv module
+     - None
+     - None
+
+**Why the difference?**
+
+Athena's CSV output format distinguishes between NULL values and empty strings:
+
+- NULL: unquoted empty field (e.g., ``a,,b`` → the middle field is NULL)
+- Empty string: quoted empty field (e.g., ``a,"",b`` → the middle field is an empty string)
+
+Python's standard ``csv`` module parses both cases as empty strings, losing this distinction.
+The ``AthenaCSVReader`` implements a custom parser that preserves the difference.
+
 Limitations
 ~~~~~~~~~~~
 
 
@@ -11,7 +11,7 @@
 from pyathena.error import ProgrammingError
 from pyathena.model import AthenaQueryExecution
 from pyathena.s3fs.converter import DefaultS3FSTypeConverter
-from pyathena.s3fs.result_set import AthenaS3FSResultSet
+from pyathena.s3fs.result_set import AthenaS3FSResultSet, CSVReaderType
 
 _logger = logging.getLogger(__name__)
 
@@ -20,12 +20,12 @@ class AsyncS3FSCursor(AsyncCursor):
     """Asynchronous cursor that reads CSV results via S3FileSystem.
 
     This cursor extends AsyncCursor to provide asynchronous query execution
-    with results read via Python's standard csv module and PyAthena's S3FileSystem.
+    with results read via PyAthena's S3FileSystem.
     It's a lightweight alternative when pandas/pyarrow are not needed.
 
     Features:
         - Asynchronous query execution with concurrent futures
-        - Uses Python's standard csv module for parsing
+        - Lightweight CSV parsing via pluggable readers
         - Uses PyAthena's S3FileSystem for S3 access
         - No external dependencies beyond boto3
         - Memory-efficient streaming for large datasets
@@ -61,6 +61,7 @@ def __init__(
         arraysize: int = CursorIterator.DEFAULT_FETCH_SIZE,
         result_reuse_enable: bool = False,
         result_reuse_minutes: int = CursorIterator.DEFAULT_RESULT_REUSE_MINUTES,
+        csv_reader: Optional[CSVReaderType] = None,
         **kwargs,
     ) -> None:
         """Initialize an AsyncS3FSCursor.
@@ -78,6 +79,11 @@ def __init__(
             arraysize: Number of rows to fetch per batch.
             result_reuse_enable: Enable Athena query result reuse.
             result_reuse_minutes: Minutes to reuse cached results.
+            csv_reader: CSV reader class to use for parsing results.
+                Use AthenaCSVReader (default) to distinguish between NULL
+                (unquoted empty) and empty string (quoted empty "").
+                Use DefaultCSVReader for backward compatibility where empty
+                strings are treated as NULL.
             **kwargs: Additional connection parameters.
 
         Example:
@@ -99,6 +105,7 @@ def __init__(
             result_reuse_minutes=result_reuse_minutes,
             **kwargs,
         )
+        self._csv_reader = csv_reader
 
     @staticmethod
     def get_default_converter(
@@ -156,6 +163,7 @@ def _collect_result_set(
             query_execution=query_execution,
             arraysize=self._arraysize,
             retry_config=self._retry_config,
+            csv_reader=self._csv_reader,
             **kwargs,
         )
 
 
@@ -9,7 +9,7 @@
 from pyathena.model import AthenaQueryExecution
 from pyathena.result_set import WithResultSet
 from pyathena.s3fs.converter import DefaultS3FSTypeConverter
-from pyathena.s3fs.result_set import AthenaS3FSResultSet
+from pyathena.s3fs.result_set import AthenaS3FSResultSet, CSVReaderType
 
 _logger = logging.getLogger(__name__)
 
@@ -59,6 +59,7 @@ def __init__(
         result_reuse_enable: bool = False,
         result_reuse_minutes: int = CursorIterator.DEFAULT_RESULT_REUSE_MINUTES,
         on_start_query_execution: Optional[Callable[[str], None]] = None,
+        csv_reader: Optional[CSVReaderType] = None,
         **kwargs,
     ) -> None:
         """Initialize an S3FSCursor.
@@ -75,11 +76,20 @@ def __init__(
             result_reuse_enable: Enable Athena query result reuse.
             result_reuse_minutes: Minutes to reuse cached results.
             on_start_query_execution: Callback invoked when query starts.
+            csv_reader: CSV reader class to use for parsing results.
+                Use AthenaCSVReader (default) to distinguish between NULL
+                (unquoted empty) and empty string (quoted empty "").
+                Use DefaultCSVReader for backward compatibility where empty
+                strings are treated as NULL.
             **kwargs: Additional connection parameters.
 
         Example:
             >>> cursor = connection.cursor(S3FSCursor)
             >>> cursor.execute("SELECT * FROM my_table")
+            >>>
+            >>> # Use DefaultCSVReader for backward compatibility
+            >>> from pyathena.s3fs.reader import DefaultCSVReader
+            >>> cursor = connection.cursor(S3FSCursor, csv_reader=DefaultCSVReader)
         """
         super().__init__(
             s3_staging_dir=s3_staging_dir,
@@ -95,6 +105,7 @@ def __init__(
             **kwargs,
         )
         self._on_start_query_execution = on_start_query_execution
+        self._csv_reader = csv_reader
         self._query_id: Optional[str] = None
         self._result_set: Optional[AthenaS3FSResultSet] = None
 
@@ -232,6 +243,7 @@ def execute(
                 query_execution=query_execution,
                 arraysize=self.arraysize,
                 retry_config=self._retry_config,
+                csv_reader=self._csv_reader,
                 **kwargs,
             )
         else: