Skip to content

Commit 6b38cde

Browse files
authored
Merge pull request #16 from vertti/config-strict
Config strict
2 parents 064edf0 + 24fad27 commit 6b38cde

File tree

6 files changed

+190
-10
lines changed

6 files changed

+190
-10
lines changed

README.md

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
![test](https://github.com/fourkind/daffy/workflows/test/badge.svg)
55
[![codecov](https://codecov.io/gh/vertti/daffy/graph/badge.svg?token=00OL75TW4W)](https://codecov.io/gh/vertti/daffy)
66

7-
## Description
7+
## Description
88

99
Working with DataFrames often means passing them through multiple transformation functions, making it easy to lose track of their structure over time. DAFFY adds runtime validation and documentation to your DataFrame operations through simple decorators. By declaring the expected columns and types in your function definitions, you can:
1010

@@ -35,7 +35,7 @@ Install with your favorite Python dependency manager like
3535
pip install daffy
3636
```
3737

38-
## Usage
38+
## Usage
3939

4040
Start by importing the needed decorators:
4141

@@ -127,6 +127,27 @@ will, when `car_df` contains columns `["Brand", "Price"]` raise an error:
127127
AssertionError: DataFrame contained unexpected column(s): Price
128128
```
129129

130+
### Project-wide Configuration
131+
132+
You can set the default value for strict mode at the project level by adding a `[tool.daffy]` section to your `pyproject.toml` file:
133+
134+
```toml
135+
[tool.daffy]
136+
strict = true
137+
```
138+
139+
When this configuration is present, all `@df_in` and `@df_out` decorators will use strict mode by default. You can still override this setting on individual decorators:
140+
141+
```python
142+
# Uses strict=true from project config
143+
@df_in(columns=["Brand"])
144+
# Explicitly disable strict mode for this decorator
145+
@df_out(columns=["Brand", "FilteredPrice"], strict=False)
146+
def filter_cars(car_df):
147+
# filter some cars
148+
return filtered_cars_df
149+
```
150+
130151
To quickly check what the incoming and outgoing dataframes contain, you can add a `@df_log` annotation to the function. For
131152
example adding `@df_log` to the above `filter_cars` function will product log lines:
132153

@@ -164,6 +185,15 @@ MIT
164185

165186
## Changelog
166187

188+
### 0.9.4
189+
190+
- Fix to strict flag loading when tool config was missing
191+
192+
### 0.9.3
193+
194+
- Add configuration system to set default strict mode in pyproject.toml
195+
- Improve logging when multiple columns are missing
196+
167197
### 0.9.2
168198

169199
- Add explicit `__all__` export for functions to make Mypy happy
@@ -212,7 +242,7 @@ MIT
212242

213243
### 0.2.1
214244

215-
- Added Pypi classifiers.
245+
- Added Pypi classifiers.
216246

217247
### 0.2.0
218248

daffy/config.py

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
"""Configuration handling for DAFFY."""
2+
3+
import os
4+
from typing import Optional
5+
6+
import tomli
7+
8+
9+
def load_config() -> dict:
10+
"""
11+
Load daffy configuration from pyproject.toml.
12+
13+
Returns:
14+
dict: Configuration dictionary with daffy settings
15+
"""
16+
default_config = {"strict": False}
17+
18+
# Try to find pyproject.toml in the current directory or parent directories
19+
config_path = find_config_file()
20+
if not config_path:
21+
return default_config
22+
23+
try:
24+
with open(config_path, "rb") as f:
25+
pyproject = tomli.load(f)
26+
27+
# Extract daffy configuration if it exists
28+
daffy_config = pyproject.get("tool", {}).get("daffy", {})
29+
30+
# Update default config with values from pyproject.toml
31+
if "strict" in daffy_config:
32+
default_config["strict"] = bool(daffy_config["strict"])
33+
34+
return default_config
35+
except (FileNotFoundError, tomli.TOMLDecodeError):
36+
return default_config
37+
38+
39+
def find_config_file() -> Optional[str]:
40+
"""
41+
Find pyproject.toml in the user's project directory.
42+
43+
This searches only in the current working directory (where the user's code is running),
44+
not in daffy's installation directory.
45+
46+
Returns:
47+
str or None: Path to pyproject.toml if found, None otherwise
48+
"""
49+
# Only look for pyproject.toml in the current working directory,
50+
# which should be the user's project directory, not daffy's installation directory
51+
current_dir = os.getcwd()
52+
config_path = os.path.join(current_dir, "pyproject.toml")
53+
54+
if os.path.isfile(config_path):
55+
return config_path
56+
57+
return None
58+
59+
60+
# Cache config to avoid reading the file multiple times
61+
_config_cache = None
62+
63+
64+
def get_config() -> dict:
65+
"""
66+
Get the daffy configuration, loading it if necessary.
67+
68+
Returns:
69+
dict: Configuration dictionary with daffy settings
70+
"""
71+
global _config_cache
72+
if _config_cache is None:
73+
_config_cache = load_config()
74+
return _config_cache
75+
76+
77+
def get_strict(strict_param: Optional[bool] = None) -> bool:
78+
"""
79+
Get the strict mode setting, with explicit parameter taking precedence over configuration.
80+
81+
Args:
82+
strict_param: Explicitly provided strict parameter value, or None to use config
83+
84+
Returns:
85+
bool: The effective strict mode setting
86+
"""
87+
if strict_param is not None:
88+
return strict_param
89+
return bool(get_config()["strict"])

daffy/decorators.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
import pandas as pd
99
import polars as pl
1010

11+
from daffy.config import get_strict
12+
1113
ColumnsDef = Union[List, Dict]
1214
DataFrameType = Union[pd.DataFrame, pl.DataFrame]
1315

@@ -42,14 +44,15 @@ def _check_columns(df: DataFrameType, columns: ColumnsDef, strict: bool) -> None
4244
raise AssertionError(f"DataFrame contained unexpected column(s): {', '.join(extra_columns)}")
4345

4446

45-
def df_out(columns: Optional[ColumnsDef] = None, strict: bool = False) -> Callable:
47+
def df_out(columns: Optional[ColumnsDef] = None, strict: Optional[bool] = None) -> Callable:
4648
"""Decorate a function that returns a Pandas DataFrame.
4749
4850
Document the return value of a function. The return value will be validated in runtime.
4951
5052
Args:
5153
columns (ColumnsDef, optional): List or dict that describes expected columns of the DataFrame. Defaults to None.
52-
strict (bool, optional): If True, columns must match exactly with no extra columns. Defaults to False.
54+
strict (bool, optional): If True, columns must match exactly with no extra columns.
55+
If None, uses the value from [tool.daffy] strict setting in pyproject.toml.
5356
5457
Returns:
5558
Callable: Decorated function
@@ -63,7 +66,7 @@ def wrapper(*args: str, **kwargs: Any) -> Any:
6366
f"Wrong return type. Expected DataFrame, got {type(result)}"
6467
)
6568
if columns:
66-
_check_columns(result, columns, strict)
69+
_check_columns(result, columns, get_strict(strict))
6770
return result
6871

6972
return wrapper
@@ -87,15 +90,16 @@ def _get_parameter(func: Callable, name: Optional[str] = None, *args: str, **kwa
8790
return kwargs[name]
8891

8992

90-
def df_in(name: Optional[str] = None, columns: Optional[ColumnsDef] = None, strict: bool = False) -> Callable:
93+
def df_in(name: Optional[str] = None, columns: Optional[ColumnsDef] = None, strict: Optional[bool] = None) -> Callable:
9194
"""Decorate a function parameter that is a Pandas DataFrame.
9295
9396
Document the contents of an inpute parameter. The parameter will be validated in runtime.
9497
9598
Args:
9699
name (Optional[str], optional): Name of the parameter that contains a DataFrame. Defaults to None.
97100
columns (ColumnsDef, optional): List or dict that describes expected columns of the DataFrame. Defaults to None.
98-
strict (bool, optional): If True, columns must match exactly with no extra columns. Defaults to False.
101+
strict (bool, optional): If True, columns must match exactly with no extra columns.
102+
If None, uses the value from [tool.daffy] strict setting in pyproject.toml.
99103
100104
Returns:
101105
Callable: Decorated function
@@ -109,7 +113,7 @@ def wrapper(*args: str, **kwargs: Any) -> Any:
109113
f"Wrong parameter type. Expected DataFrame, got {type(df).__name__} instead."
110114
)
111115
if columns:
112-
_check_columns(df, columns, strict)
116+
_check_columns(df, columns, get_strict(strict))
113117
return func(*args, **kwargs)
114118

115119
return wrapper

mise.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[tools]
2+
node = "latest"
3+
poetry = "latest"

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "daffy"
3-
version = "0.9.2"
3+
version = "0.9.4"
44
description = "Function decorators for Pandas and Polars Dataframe column name and data type validation"
55
authors = [
66
{ name="Janne Sinivirta", email="[email protected]" },
@@ -36,6 +36,7 @@ include = ["daffy/py.typed"]
3636
python = ">=3.9.0,<4.0.0"
3737
pandas = ">=1.5.1,<3.0.0"
3838
polars = "^1.7.0"
39+
tomli = "^2.0.0"
3940

4041
[tool.poetry.group.dev.dependencies]
4142
pytest = "^8.3.0"

tests/test_config.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
"""Tests for the daffy configuration system."""
2+
3+
import os
4+
import tempfile
5+
from unittest.mock import patch
6+
7+
from daffy.config import get_config, get_strict
8+
9+
10+
def test_get_config_default() -> None:
11+
"""Test that get_config returns default values when no config file is found."""
12+
with patch("daffy.config.find_config_file", return_value=None):
13+
config = get_config()
14+
assert config["strict"] is False
15+
16+
17+
def test_get_strict_default() -> None:
18+
"""Test that get_strict returns default value when no explicit value is provided."""
19+
with patch("daffy.config.get_config", return_value={"strict": False}):
20+
assert get_strict() is False
21+
22+
with patch("daffy.config.get_config", return_value={"strict": True}):
23+
assert get_strict() is True
24+
25+
26+
def test_get_strict_override() -> None:
27+
"""Test that get_strict respects explicitly provided value."""
28+
with patch("daffy.config.get_config", return_value={"strict": False}):
29+
assert get_strict(True) is True
30+
31+
with patch("daffy.config.get_config", return_value={"strict": True}):
32+
assert get_strict(False) is False
33+
34+
35+
def test_config_from_pyproject() -> None:
36+
"""Test loading configuration from pyproject.toml."""
37+
with tempfile.TemporaryDirectory() as tmpdir:
38+
# Create a mock pyproject.toml file
39+
with open(os.path.join(tmpdir, "pyproject.toml"), "w") as f:
40+
f.write("""
41+
[tool.daffy]
42+
strict = true
43+
""")
44+
45+
# Test loading from this file
46+
with patch("daffy.config.os.getcwd", return_value=tmpdir):
47+
# Reset the cache to force reloading
48+
from daffy.config import load_config
49+
50+
globals()["_config_cache"] = None
51+
52+
config = load_config()
53+
assert config["strict"] is True

0 commit comments

Comments
 (0)