Skip to content

Commit 23b2e0f

Browse files
authored
Merge pull request #11 from vertti/polars-support
Polars support
2 parents 59ba620 + 151383b commit 23b2e0f

File tree

4 files changed

+196
-105
lines changed

4 files changed

+196
-105
lines changed

README.md

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,21 +7,19 @@
77

88
## Description
99

10-
In projects using Pandas, it's very common to have functions that take Pandas DataFrames as input or produce them as output.
11-
It's hard to figure out quickly what these DataFrames contain. This library offers simple decorators to annotate your functions
12-
so that they document themselves and that documentation is kept up-to-date by validating the input and output on runtime.
13-
14-
For example,
10+
Working with DataFrames often means passing them through multiple transformation functions, making it easy to lose track of their structure over time. DAFFY adds runtime validation and documentation to your DataFrame operations through simple decorators. By declaring the expected columns and types in your function definitions, you can:
1511

1612
```python
17-
@df_in(columns=["Brand", "Price"]) # the function expects a DataFrame as input parameter with columns Brand and Price
18-
@df_out(columns=["Brand", "Price"]) # the function will return a DataFrame with columns Brand and Price
19-
def filter_cars(car_df):
20-
# before this code is executed, the input DataFrame is validated according to the above decorator
21-
# filter some cars..
22-
return filtered_cars_df
13+
@df_in(columns=["price", "bedrooms", "location"])
14+
@df_out(columns=["price_per_room", "price_category"])
15+
def analyze_housing(houses_df):
16+
# Transform raw housing data into price analysis
17+
return analyzed_df
2318
```
2419

20+
Like type hints for DataFrames, DAFFY helps you catch structural mismatches early and keeps your data pipeline documentation synchronized with the code. Compatible with both Pandas and Polars.
21+
22+
2523
## Table of Contents
2624
* [Installation](#installation)
2725
* [Usage](#usage)
@@ -167,6 +165,10 @@ MIT
167165

168166
## Changelog
169167

168+
### 0.8.0
169+
170+
- Support Polars DataFrames
171+
170172
### 0.7.0
171173

172174
- Support Pandas 2.x

daffy/decorators.py

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,13 @@
66
from typing import Any, Callable, Dict, List, Optional, Union
77

88
import pandas as pd
9+
import polars as pl
910

1011
ColumnsDef = Union[List, Dict]
12+
DataFrameType = Union[pd.DataFrame, pl.DataFrame]
1113

1214

13-
def _check_columns(df: pd.DataFrame, columns: ColumnsDef, strict: bool) -> None:
15+
def _check_columns(df: DataFrameType, columns: ColumnsDef, strict: bool) -> None:
1416
if isinstance(columns, list):
1517
for column in columns:
1618
assert column in df.columns, f"Column {column} missing from DataFrame. Got {_describe_pd(df)}"
@@ -43,7 +45,9 @@ def wrapper_df_out(func: Callable) -> Callable:
4345
@wraps(func)
4446
def wrapper(*args: str, **kwargs: Any) -> Any:
4547
result = func(*args, **kwargs)
46-
assert isinstance(result, pd.DataFrame), f"Wrong return type. Expected pandas dataframe, got {type(result)}"
48+
assert isinstance(result, pd.DataFrame) or isinstance(result, pl.DataFrame), (
49+
f"Wrong return type. Expected DataFrame, got {type(result)}"
50+
)
4751
if columns:
4852
_check_columns(result, columns, strict)
4953
return result
@@ -53,7 +57,7 @@ def wrapper(*args: str, **kwargs: Any) -> Any:
5357
return wrapper_df_out
5458

5559

56-
def _get_parameter(func: Callable, name: Optional[str] = None, *args: str, **kwargs: Any) -> pd.DataFrame:
60+
def _get_parameter(func: Callable, name: Optional[str] = None, *args: str, **kwargs: Any) -> DataFrameType:
5761
if not name:
5862
if len(args) == 0:
5963
return None
@@ -85,8 +89,8 @@ def wrapper_df_in(func: Callable) -> Callable:
8589
@wraps(func)
8690
def wrapper(*args: str, **kwargs: Any) -> Any:
8791
df = _get_parameter(func, name, *args, **kwargs)
88-
assert isinstance(df, pd.DataFrame), (
89-
f"Wrong parameter type. Expected Pandas DataFrame, got {type(df).__name__} instead."
92+
assert isinstance(df, pd.DataFrame) or isinstance(df, pl.DataFrame), (
93+
f"Wrong parameter type. Expected DataFrame, got {type(df).__name__} instead."
9094
)
9195
if columns:
9296
_check_columns(df, columns, strict)
@@ -97,24 +101,27 @@ def wrapper(*args: str, **kwargs: Any) -> Any:
97101
return wrapper_df_in
98102

99103

100-
def _describe_pd(df: pd.DataFrame, include_dtypes: bool = False) -> str:
104+
def _describe_pd(df: DataFrameType, include_dtypes: bool = False) -> str:
101105
result = f"columns: {list(df.columns)}"
102106
if include_dtypes:
103-
readable_dtypes = [dtype.name for dtype in df.dtypes]
104-
result += f" with dtypes {readable_dtypes}"
107+
if isinstance(df, pd.DataFrame):
108+
readable_dtypes = [dtype.name for dtype in df.dtypes]
109+
result += f" with dtypes {readable_dtypes}"
110+
if isinstance(df, pl.DataFrame):
111+
result += f" with dtypes {df.dtypes}"
105112
return result
106113

107114

108115
def _log_input(level: int, func_name: str, df: Any, include_dtypes: bool) -> None:
109-
if isinstance(df, pd.DataFrame):
116+
if isinstance(df, pd.DataFrame) or isinstance(df, pl.DataFrame):
110117
logging.log(
111118
level,
112119
f"Function {func_name} parameters contained a DataFrame: {_describe_pd(df, include_dtypes)}",
113120
)
114121

115122

116123
def _log_output(level: int, func_name: str, df: Any, include_dtypes: bool) -> None:
117-
if isinstance(df, pd.DataFrame):
124+
if isinstance(df, pd.DataFrame) or isinstance(df, pl.DataFrame):
118125
logging.log(
119126
level,
120127
f"Function {func_name} returned a DataFrame: {_describe_pd(df, include_dtypes)}",

pyproject.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[project]
22
name = "daffy"
3-
version = "0.7.0"
4-
description = "Function decorators for Pandas Dataframe column name and data type validation"
3+
version = "0.8.0"
4+
description = "Function decorators for Pandas and Polars Dataframe column name and data type validation"
55
authors = [
66
{ name="Janne Sinivirta", email="[email protected]" },
77
]
@@ -32,6 +32,7 @@ include = [
3232
[tool.poetry.dependencies]
3333
python = ">=3.9.0,<4.0.0"
3434
pandas = ">=1.5.1,<3.0.0"
35+
polars = "^1.7.0"
3536

3637
[tool.poetry.group.dev.dependencies]
3738
pytest = "^7.4.3"

0 commit comments

Comments
 (0)