Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions awswrangler/_data_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ def pyarrow2athena( # noqa: PLR0911,PLR0912
)
if pa.types.is_map(dtype):
return f"map<{pyarrow2athena(dtype=dtype.key_type, ignore_null=ignore_null)},{pyarrow2athena(dtype=dtype.item_type, ignore_null=ignore_null)}>"
if isinstance(dtype, getattr(pa, "BaseExtensionType", pa.ExtensionType)):
return pyarrow2athena(dtype=dtype.storage_type, ignore_null=ignore_null)
if dtype == pa.null():
if ignore_null:
return ""
Expand Down
36 changes: 36 additions & 0 deletions tests/unit/test_s3_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,42 @@ def test_read_parquet_metadata_large_dtype(path):
assert columns_types.get("c1") == "string"


def test_pyarrow2athena_uuid_extension_type():
from awswrangler._data_types import pyarrow2athena

assert pyarrow2athena(pa.uuid()) == "binary"


def test_pyarrow2athena_custom_extension_type():
from awswrangler._data_types import pyarrow2athena

class _TestExtType(pa.ExtensionType):
def __init__(self):
super().__init__(pa.int64(), "test.custom_ext")

def __arrow_ext_serialize__(self):
return b""

@classmethod
def __arrow_ext_deserialize__(cls, storage_type, serialized):
return cls()

assert pyarrow2athena(_TestExtType()) == "bigint"


def test_athena_types_from_pyarrow_schema_with_extension():
from awswrangler._data_types import athena_types_from_pyarrow_schema

schema = pa.schema(
[
pa.field("id", pa.uuid()),
pa.field("value", pa.int64()),
]
)
result = athena_types_from_pyarrow_schema(schema)
assert result == {"id": "binary", "value": "bigint"}


@pytest.mark.parametrize(
"partition_cols",
[
Expand Down
Loading