Skip to content

Commit f8d5d1c

Browse files
committed
better optional dependencies handling; release
1 parent 8af9b3b commit f8d5d1c

File tree

3 files changed

+38
-47
lines changed

3 files changed

+38
-47
lines changed

affinity.py

Lines changed: 25 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -8,28 +8,32 @@
88
from time import time
99
from typing import TYPE_CHECKING, List, Optional, Tuple
1010

11+
import duckdb
1112
import numpy as np
1213
import pandas as pd
1314

1415

15-
def try_import(module: str) -> object | None:
16-
try:
17-
return import_module(module)
18-
except ImportError:
19-
# print(f"{module} not found in the current environment")
20-
return
16+
class _modules:
17+
"""Stores modules imported conditionally."""
18+
19+
def try_import(modules: List[str]) -> None:
20+
"""Conditional imports."""
21+
for module in modules:
22+
try:
23+
_module = import_module(module)
24+
globals()[module] = _module # used here
25+
setattr(_modules, module, _module) # used in tests
26+
except ImportError:
27+
setattr(_modules, module, False)
2128

2229

2330
if TYPE_CHECKING:
24-
import duckdb # type: ignore
25-
import polars as pl # type: ignore
26-
import pyarrow as pa # type: ignore
27-
import pyarrow.parquet as pq # type: ignore
31+
import awswrangler # type: ignore
32+
import polars # type: ignore
33+
import pyarrow # type: ignore
34+
import pyarrow.parquet # type: ignore
2835
else:
29-
duckdb = try_import("duckdb")
30-
pl = try_import("polars")
31-
pa = try_import("pyarrow")
32-
pq = try_import("pyarrow.parquet")
36+
_modules.try_import(["awswrangler", "polars", "pyarrow", "pyarrow.parquet"])
3337

3438

3539
@dataclass
@@ -213,7 +217,7 @@ def build(cls, query=None, dataframe=None, **kwargs):
213217

214218
@classmethod
215219
def from_dataframe(
216-
cls, dataframe: pd.DataFrame | Optional["pl.DataFrame"], **kwargs
220+
cls, dataframe: pd.DataFrame | Optional["polars.DataFrame"], **kwargs
217221
):
218222
instance = cls()
219223
for i, k in enumerate(dict(instance)):
@@ -237,9 +241,7 @@ def from_sql(cls, query: str, **kwargs):
237241
@property
238242
def athena_types(self):
239243
"""Convert pandas types to SQL types for loading into AWS Athena."""
240-
241-
wr = try_import("awswrangler")
242-
columns_types, partition_types = wr.catalog.extract_athena_types(
244+
columns_types, partition_types = awswrangler.catalog.extract_athena_types(
243245
df=self.df,
244246
partition_cols=self.LOCATION.partition_by,
245247
)
@@ -365,17 +367,17 @@ def df4(self) -> pd.DataFrame:
365367
return self.df
366368

367369
@property
368-
def arrow(self) -> "pa.Table":
370+
def arrow(self) -> "pyarrow.Table":
369371
metadata = {str(k): str(v) for k, v in self.metadata.items()}
370372
_dict = {
371373
k: [v.dict for v in vector] if self.is_dataset(k) else vector
372374
for k, vector in self
373375
}
374-
return pa.table(_dict, metadata=metadata)
376+
return pyarrow.table(_dict, metadata=metadata)
375377

376378
@property
377-
def pl(self) -> "pl.DataFrame":
378-
return pl.DataFrame(dict(self))
379+
def pl(self) -> "polars.DataFrame":
380+
return polars.DataFrame(dict(self))
379381

380382
def is_dataset(self, key):
381383
attr = getattr(self, key, None)
@@ -428,7 +430,7 @@ def to_parquet(self, path, engine="duckdb", **kwargs):
428430
if engine == "pandas":
429431
self.df.to_parquet(path)
430432
elif engine == "arrow":
431-
pq.write_table(self.arrow, path)
433+
pyarrow.parquet.write_table(self.arrow, path)
432434
elif engine == "duckdb":
433435
kv_metadata = []
434436
for k, v in self.metadata.items():

pyproject.toml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "affinity"
7-
version = "1.0.0"
7+
version = "1.0.1"
88
description = "Module for creating well-documented datasets, with types and annotations."
99
authors = [
1010
{ name = "Alex Kislukhin" }
@@ -13,7 +13,6 @@ readme = "README.md"
1313
requires-python = ">=3.11"
1414

1515
dependencies = [
16-
"awswrangler>=3.10.1",
1716
"duckdb>=1",
1817
"pandas",
1918
]
@@ -24,6 +23,9 @@ dev = [
2423
"pyarrow>=17",
2524
"pytest>=8",
2625
]
26+
aws = [
27+
"awswrangler>=3.10.1",
28+
]
2729

2830
[tool.hatch.build]
2931
include = [

test_affinity.py

Lines changed: 9 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -11,20 +11,6 @@
1111
# https://github.com/duckdb/duckdb/issues/14179
1212
duckdb.sql("SET python_scan_all_frames=true")
1313

14-
try:
15-
import polars # noqa: F401
16-
17-
NO_POLARS = False
18-
except ImportError:
19-
NO_POLARS = True
20-
21-
try:
22-
import pyarrow
23-
24-
NO_PYARROW = False
25-
except ImportError:
26-
NO_PYARROW = True
27-
2814

2915
def test_location_default():
3016
loc = af.Location()
@@ -266,8 +252,8 @@ class aDataset(af.Dataset):
266252
pd.testing.assert_frame_equal(data.df, source_df.astype(default_dtypes))
267253

268254

269-
@pytest.mark.skipif(NO_POLARS, reason="polars is not installed")
270-
@pytest.mark.skipif(NO_PYARROW, reason="pyarrow is not installed")
255+
@pytest.mark.skipif(not af._modules.polars, reason="polars is not installed")
256+
@pytest.mark.skipif(not af._modules.pyarrow, reason="pyarrow is not installed")
271257
def test_to_polars():
272258
class aDataset(af.Dataset):
273259
v1 = af.VectorBool("")
@@ -280,7 +266,7 @@ class aDataset(af.Dataset):
280266
assert str(polars_df.dtypes) == "[Boolean, Float32, Int16]"
281267

282268

283-
@pytest.mark.skipif(NO_PYARROW, reason="pyarrow is not installed")
269+
@pytest.mark.skipif(not af._modules.pyarrow, reason="pyarrow is not installed")
284270
def test_to_pyarrow():
285271
class aDataset(af.Dataset):
286272
v1 = af.VectorBool("")
@@ -329,6 +315,7 @@ class cDataset(af.Dataset):
329315
cDataset().sql("SELECT v2 FROM df") # "df" != last test's data_a.df
330316

331317

318+
@pytest.mark.skipif(not af._modules.awswrangler, reason="awswrangler is not installed")
332319
def test_kwargs_for_create_athena_table():
333320
class aDataset(af.Dataset):
334321
"""Document me!"""
@@ -352,7 +339,7 @@ class aDataset(af.Dataset):
352339
}
353340

354341

355-
@pytest.mark.skipif(NO_PYARROW, reason="pyarrow is not installed")
342+
@pytest.mark.skipif(not af._modules.pyarrow, reason="pyarrow is not installed")
356343
def test_objects_as_metadata():
357344
class aDataset(af.Dataset):
358345
"""Objects other than strings can go into metadata."""
@@ -369,7 +356,7 @@ def try_ast_literal_eval(x: str):
369356
data = aDataset(v1=[True], v2=[1 / 2], v3=[3])
370357
test_file_arrow = Path("test_arrow.parquet")
371358
data.to_parquet(test_file_arrow, engine="arrow")
372-
pf = pyarrow.parquet.ParquetFile(test_file_arrow)
359+
pf = af._modules.pyarrow.parquet.ParquetFile(test_file_arrow)
373360
pf_metadata = pf.schema_arrow.metadata
374361
decoded_metadata = {
375362
k.decode(): try_ast_literal_eval(v.decode()) for k, v in pf_metadata.items()
@@ -378,8 +365,8 @@ def try_ast_literal_eval(x: str):
378365
assert decoded_metadata.get("v2") == aDataset.v2.comment
379366

380367

381-
@pytest.mark.skipif(NO_POLARS, reason="polars is not installed")
382-
@pytest.mark.skipif(NO_PYARROW, reason="pyarrow is not installed")
368+
@pytest.mark.skipif(not af._modules.polars, reason="polars is not installed")
369+
@pytest.mark.skipif(not af._modules.pyarrow, reason="pyarrow is not installed")
383370
def test_to_parquet_with_metadata():
384371
class aDataset(af.Dataset):
385372
"""Delightful data."""
@@ -441,7 +428,7 @@ class KeyValueMetadata(af.Dataset):
441428
)
442429

443430

444-
@pytest.mark.skipif(NO_PYARROW, reason="pyarrow is not installed")
431+
@pytest.mark.skipif(not af._modules.pyarrow, reason="pyarrow is not installed")
445432
def test_parquet_roundtrip_with_rename():
446433
class IsotopeData(af.Dataset):
447434
symbol = af.VectorObject("Element")

0 commit comments

Comments
 (0)