Skip to content

Commit 3f611e6

Browse files
committed
create athena table kwargs
1 parent 562ff47 commit 3f611e6

File tree

4 files changed

+56
-4
lines changed

4 files changed

+56
-4
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1-
**/__pycache__
1+
**/__pycache__
2+
uv.lock

affinity.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
__doc__ = """
22
Module for creating well-documented datasets, with types and annotations.
3-
Version 0.8.1
43
"""
54

65
from dataclasses import dataclass, field
@@ -235,6 +234,34 @@ def from_sql(cls, query: str, **kwargs):
235234
instance.origin["source"] += f"\nquery:\n{query}"
236235
return instance
237236

237+
@property
238+
def athena_types(self):
239+
"""Convert pandas types to SQL types for loading into AWS Athena."""
240+
241+
wr = try_import("awswrangler")
242+
columns_types, partition_types = wr.catalog.extract_athena_types(
243+
df=self.df,
244+
partition_cols=self.LOCATION.partition_by,
245+
)
246+
return columns_types, partition_types
247+
248+
def kwargs_for_create_athena_table(
249+
self, db: str, table: str, compression: str | None = None, **kwargs
250+
):
251+
"""Arguments for creating AWS Athena tables."""
252+
columns_types, partitions_types = self.athena_types
253+
return dict(
254+
database=db,
255+
table=table,
256+
path=self.LOCATION.folder,
257+
columns_types=columns_types,
258+
partitions_types=partitions_types,
259+
compression=compression,
260+
description=self.__doc__,
261+
columns_comments=self.data_dict,
262+
**kwargs,
263+
)
264+
238265

239266
class Dataset(DatasetBase):
240267
"""Base class for typed, annotated datasets."""

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "affinity"
7-
version = "0.8.7"
7+
version = "0.9.0"
88
description = "Module for creating well-documented datasets, with types and annotations."
99
authors = [
1010
{ name = "Alex Kislukhin" }
@@ -13,6 +13,7 @@ readme = "README.md"
1313
requires-python = ">=3.11"
1414

1515
dependencies = [
16+
"awswrangler>=3.10.1",
1617
"duckdb>=1",
1718
"pandas",
1819
]

test_affinity.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ class aDataset(af.Dataset):
101101
def test_wrong_dataset_declaration():
102102
class aDataset(af.Dataset):
103103
v: af.Vector(np.int8) # type: ignore
104-
# v = af.Vector(np.int8) # the correct way
104+
# v = af.Vector(np.int8) # this is the correct way
105105

106106
with pytest.raises(ValueError):
107107
aDataset()
@@ -329,6 +329,29 @@ class cDataset(af.Dataset):
329329
cDataset().sql("SELECT v2 FROM df") # "df" != last test's data_a.df
330330

331331

332+
def test_kwargs_for_create_athena_table():
333+
class aDataset(af.Dataset):
334+
"""Document me!"""
335+
336+
v1 = af.VectorI8("abc")
337+
v2 = af.VectorString("xyz")
338+
LOCATION = af.Location(folder=".", partition_by=["v1"])
339+
340+
create_athena_table_kwargs = aDataset().kwargs_for_create_athena_table(
341+
db="bd", table="desk"
342+
)
343+
assert create_athena_table_kwargs == {
344+
"database": "bd",
345+
"table": "desk",
346+
"path": ".",
347+
"columns_types": {"v2": "string"},
348+
"partitions_types": {"v1": "tinyint"},
349+
"compression": None,
350+
"description": "Document me!",
351+
"columns_comments": {"v1": "abc", "v2": "xyz"},
352+
}
353+
354+
332355
@pytest.mark.skipif(NO_PYARROW, reason="pyarrow is not installed")
333356
def test_objects_as_metadata():
334357
class aDataset(af.Dataset):

0 commit comments

Comments
 (0)