Skip to content

Commit b86ac37

Browse files
authored
Merge pull request #33 from vertti/improved-error-messages
Improve Exception Messages for Better Debugging
2 parents ec7e84d + bcae942 commit b86ac37

File tree

8 files changed

+177
-26
lines changed

8 files changed

+177
-26
lines changed

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,13 @@
22

33
All notable changes to this project will be documented in this file.
44

5+
## 0.15.0
6+
7+
- Exception messages now include function names to improve debugging
8+
- Input validation: `"Missing columns: ['Col'] in function 'my_func' parameter 'param'. Got columns: ['Other']"`
9+
- Return value validation messages now clearly state "return value" instead of just showing function name
10+
- Output validation: `"Missing columns: ['Col'] in function 'my_func' return value. Got columns: ['Other']"`
11+
512
## 0.14.2
613

714
- Internal code quality improvements

daffy/decorators.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def wrapper(*args: Any, **kwargs: Any) -> DF:
5050
result = func(*args, **kwargs)
5151
assert_is_dataframe(result, "return type")
5252
if columns:
53-
validate_dataframe(result, columns, get_strict(strict))
53+
validate_dataframe(result, columns, get_strict(strict), None, func.__name__, True)
5454
return result
5555

5656
return wrapper
@@ -86,7 +86,7 @@ def wrapper(*args: Any, **kwargs: Any) -> R:
8686
param_name = get_parameter_name(func, name, *args, **kwargs)
8787
assert_is_dataframe(df, "parameter type")
8888
if columns:
89-
validate_dataframe(df, columns, get_strict(strict), param_name)
89+
validate_dataframe(df, columns, get_strict(strict), param_name, func.__name__)
9090
return func(*args, **kwargs)
9191

9292
return wrapper

daffy/utils.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,21 @@ def assert_is_dataframe(obj: Any, context: str) -> None:
1919
raise AssertionError(f"Wrong {context}. Expected DataFrame, got {type(obj).__name__} instead.")
2020

2121

22-
def format_param_context(param_name: Optional[str]) -> str:
23-
return f" in parameter '{param_name}'" if param_name else ""
22+
def format_param_context(
23+
param_name: Optional[str], func_name: Optional[str] = None, is_return_value: bool = False
24+
) -> str:
25+
context_parts = []
26+
if func_name:
27+
context_parts.append(f"function '{func_name}'")
28+
29+
if is_return_value:
30+
context_parts.append("return value")
31+
elif param_name:
32+
context_parts.append(f"parameter '{param_name}'")
33+
34+
if context_parts:
35+
return f" in {' '.join(context_parts)}"
36+
return ""
2437

2538

2639
def get_parameter(func: Callable[..., Any], name: Optional[str] = None, *args: Any, **kwargs: Any) -> Any:

daffy/validation.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,12 @@ def _find_dtype_mismatches(
4747

4848

4949
def validate_dataframe(
50-
df: DataFrameType, columns: Union[ColumnsList, ColumnsDict], strict: bool, param_name: Optional[str] = None
50+
df: DataFrameType,
51+
columns: Union[ColumnsList, ColumnsDict],
52+
strict: bool,
53+
param_name: Optional[str] = None,
54+
func_name: Optional[str] = None,
55+
is_return_value: bool = False,
5156
) -> None:
5257
df_columns = list(df.columns) # Cache the column list conversion
5358
all_missing_columns = []
@@ -69,7 +74,7 @@ def validate_dataframe(
6974
all_dtype_mismatches.extend(_find_dtype_mismatches(column_spec, df, expected_dtype, df_columns))
7075
all_matched_by_regex.update(find_regex_matches(column_spec, df_columns))
7176

72-
param_info = format_param_context(param_name)
77+
param_info = format_param_context(param_name, func_name, is_return_value)
7378

7479
if all_missing_columns:
7580
raise AssertionError(f"Missing columns: {all_missing_columns}{param_info}. Got {describe_dataframe(df)}")

docs/usage.md

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,11 @@ def get_all_cars():
4848
In case one of the listed columns is missing from the DataFrame, a helpful assertion error is thrown:
4949

5050
```python
51-
AssertionError("Column Price missing from DataFrame. Got columns: ['Brand']")
51+
AssertionError("Missing columns: ['Price'] in function 'get_all_cars' return value. Got columns: ['Brand']")
5252
```
5353

54+
The error message clearly indicates that this is a **return value** validation failure in the function `get_all_cars`.
55+
5456
## Combined Validation
5557

5658
To check both input and output, just use both annotations on the same function:
@@ -63,6 +65,10 @@ def filter_cars(car_df):
6365
return filtered_cars_df
6466
```
6567

68+
Note that error messages will clearly distinguish between input and output validation failures:
69+
- Input validation: `"Missing columns: ['Price'] in function 'filter_cars' parameter 'car_df'. Got columns: ['Brand']"`
70+
- Output validation: `"Missing columns: ['Price'] in function 'filter_cars' return value. Got columns: ['Brand']"`
71+
6672
## Column Pattern Matching with Regex
6773

6874
You can use regex patterns to match column names that follow a specific pattern. This is useful when working with dynamic column names or when dealing with many similar columns.
@@ -83,7 +89,7 @@ In this example:
8389
If no columns match a regex pattern, an error is raised:
8490

8591
```
86-
AssertionError: Missing columns: ['r/Price_\d+/']. Got columns: ['Brand', 'Model']
92+
AssertionError: Missing columns: ['r/Price_\d+/'] in function 'process_data' parameter 'df'. Got columns: ['Brand', 'Model']
8793
```
8894

8995
Regex patterns are also considered in strict mode. Any column matching a regex pattern is considered valid.
@@ -105,7 +111,7 @@ columns={"Brand": "object", "Price": "int64"}
105111
This will not only check that the specified columns are found from the DataFrame but also that their `dtype` is the expected. In case of a wrong `dtype`, an error message similar to following will explain the mismatch:
106112

107113
```
108-
AssertionError("Column Price has wrong dtype. Was int64, expected float64")
114+
AssertionError("Column Price in function 'process_cars' parameter 'car_df' has wrong dtype. Was int64, expected float64")
109115
```
110116

111117
### Combining Regex Patterns with Data Type Validation
@@ -126,7 +132,7 @@ In this example:
126132
If a column matches the regex pattern but has the wrong dtype, an error is raised:
127133

128134
```
129-
AssertionError: Column Price_2 has wrong dtype. Was float64, expected int64
135+
AssertionError: Column Price_2 in function 'process_data' parameter 'df' has wrong dtype. Was float64, expected int64
130136
```
131137

132138
## Strict Mode
@@ -142,7 +148,7 @@ def process_cars(car_df):
142148
will, when `car_df` contains columns `["Brand", "Price"]` raise an error:
143149

144150
```
145-
AssertionError: DataFrame contained unexpected column(s): Price
151+
AssertionError: DataFrame in function 'process_cars' parameter 'car_df' contained unexpected column(s): Price
146152
```
147153

148154
## Project-wide Configuration

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "daffy"
3-
version = "0.14.2"
3+
version = "0.15.0"
44
description = "Function decorators for Pandas and Polars Dataframe column name and data type validation"
55
authors = [
66
{ name="Janne Sinivirta", email="[email protected]" },

tests/test_df_in.py

Lines changed: 55 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ def test_fn(my_input: Any, _df: DataFrameType) -> DataFrameType:
106106
with pytest.raises(AssertionError) as excinfo:
107107
test_fn("foo", _df=df)
108108

109-
assert "DataFrame in parameter '_df' contained unexpected column(s): Price" in str(excinfo.value)
109+
assert "DataFrame in function 'test_fn' parameter '_df' contained unexpected column(s): Price" in str(excinfo.value)
110110

111111

112112
def test_correct_input_with_columns_and_dtypes_pandas(basic_pandas_df: pd.DataFrame) -> None:
@@ -133,7 +133,10 @@ def test_fn(my_input: Any) -> Any:
133133
with pytest.raises(AssertionError) as excinfo:
134134
test_fn(basic_pandas_df)
135135

136-
assert "Column Price in parameter 'my_input' has wrong dtype. Was int64, expected float64" in str(excinfo.value)
136+
assert (
137+
"Column Price in function 'test_fn' parameter 'my_input' has wrong dtype. Was int64, expected float64"
138+
in str(excinfo.value)
139+
)
137140

138141

139142
def test_dtype_mismatch_polars(basic_polars_df: pl.DataFrame) -> None:
@@ -144,7 +147,10 @@ def test_fn(my_input: Any) -> Any:
144147
with pytest.raises(AssertionError) as excinfo:
145148
test_fn(basic_polars_df)
146149

147-
assert "Column Price in parameter 'my_input' has wrong dtype. Was Int64, expected Float64" in str(excinfo.value)
150+
assert (
151+
"Column Price in function 'test_fn' parameter 'my_input' has wrong dtype. Was Int64, expected Float64"
152+
in str(excinfo.value)
153+
)
148154

149155

150156
@pytest.mark.parametrize(("df"), [pd.DataFrame(cars), pl.DataFrame(cars)])
@@ -155,7 +161,9 @@ def test_fn(my_input: Any) -> Any:
155161

156162
with pytest.raises(AssertionError) as excinfo:
157163
test_fn(df[["Brand"]])
158-
assert "Missing columns: ['Price'] in parameter 'my_input'. Got columns: ['Brand']" in str(excinfo.value)
164+
assert "Missing columns: ['Price'] in function 'test_fn' parameter 'my_input'. Got columns: ['Brand']" in str(
165+
excinfo.value
166+
)
159167

160168

161169
@pytest.mark.parametrize(("df"), [pd.DataFrame(cars), pl.DataFrame(cars)])
@@ -166,7 +174,10 @@ def test_fn(my_input: Any) -> Any:
166174

167175
with pytest.raises(AssertionError) as excinfo:
168176
test_fn(df[["Brand"]])
169-
assert "Missing columns: ['Price', 'Extra'] in parameter 'my_input'. Got columns: ['Brand']" in str(excinfo.value)
177+
assert (
178+
"Missing columns: ['Price', 'Extra'] in function 'test_fn' parameter 'my_input'. Got columns: ['Brand']"
179+
in str(excinfo.value)
180+
)
170181

171182

172183
@pytest.mark.parametrize(
@@ -256,7 +267,9 @@ def test_fn(my_input: Any) -> Any:
256267
with pytest.raises(AssertionError) as excinfo:
257268
test_fn(df)
258269

259-
assert "DataFrame in parameter 'my_input' contained unexpected column(s): Price" in str(excinfo.value)
270+
assert "DataFrame in function 'test_fn' parameter 'my_input' contained unexpected column(s): Price" in str(
271+
excinfo.value
272+
)
260273

261274

262275
def test_regex_column_with_dtype_pandas(basic_pandas_df: pd.DataFrame) -> None:
@@ -289,7 +302,10 @@ def test_fn(my_input: Any) -> Any:
289302
with pytest.raises(AssertionError) as excinfo:
290303
test_fn(df)
291304

292-
assert "Column Price_2 in parameter 'my_input' has wrong dtype. Was float64, expected int64" in str(excinfo.value)
305+
assert (
306+
"Column Price_2 in function 'test_fn' parameter 'my_input' has wrong dtype. Was float64, expected int64"
307+
in str(excinfo.value)
308+
)
293309

294310

295311
def test_regex_column_with_dtype_polars(basic_polars_df: pl.DataFrame) -> None:
@@ -323,7 +339,7 @@ def test_fn(cars: DataFrameType, ext_cars: DataFrameType) -> int:
323339
with pytest.raises(AssertionError) as excinfo:
324340
test_fn(cars=basic_df, ext_cars=extended_df)
325341

326-
assert "Missing columns: ['NonExistent'] in parameter 'ext_cars'" in str(excinfo.value)
342+
assert "Missing columns: ['NonExistent'] in function 'test_fn' parameter 'ext_cars'" in str(excinfo.value)
327343

328344

329345
def test_check_columns_handles_invalid_column_type_in_list() -> None:
@@ -384,3 +400,34 @@ def test_fn(df: pd.DataFrame) -> pd.DataFrame:
384400
df = pd.DataFrame({"A": [1, 2], "B": [3, 4]})
385401
result = test_fn(df)
386402
assert result is not None
403+
404+
405+
def test_function_name_appears_in_missing_columns_exception() -> None:
406+
@df_in(columns=["Brand", "NonExistentColumn"])
407+
def my_test_function(df: pd.DataFrame) -> pd.DataFrame:
408+
return df
409+
410+
df = pd.DataFrame({"Brand": ["Toyota", "Honda"]})
411+
412+
with pytest.raises(AssertionError) as excinfo:
413+
my_test_function(df)
414+
415+
# Should include function name in the exception message
416+
assert "my_test_function" in str(excinfo.value)
417+
assert "Missing columns: ['NonExistentColumn']" in str(excinfo.value)
418+
419+
420+
def test_function_name_appears_in_dtype_mismatch_exception() -> None:
421+
@df_in(columns={"Brand": "object", "Price": "float64"})
422+
def another_test_function(df: pd.DataFrame) -> pd.DataFrame:
423+
return df
424+
425+
df = pd.DataFrame({"Brand": ["Toyota"], "Price": [100]}) # Price is int64, not float64
426+
427+
with pytest.raises(AssertionError) as excinfo:
428+
another_test_function(df)
429+
430+
# Should include function name in the exception message
431+
assert "another_test_function" in str(excinfo.value)
432+
assert "Column Price" in str(excinfo.value)
433+
assert "wrong dtype" in str(excinfo.value)

tests/test_df_out.py

Lines changed: 79 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def test_fn() -> DataFrameType:
6464
with pytest.raises(AssertionError) as excinfo:
6565
test_fn()
6666

67-
assert "DataFrame contained unexpected column(s): Price" in str(excinfo.value)
67+
assert "DataFrame in function 'test_fn' return value contained unexpected column(s): Price" in str(excinfo.value)
6868

6969

7070
@pytest.mark.parametrize(("df"), [pd.DataFrame(cars), pl.DataFrame(cars)])
@@ -76,7 +76,9 @@ def test_fn() -> DataFrameType:
7676
with pytest.raises(AssertionError) as excinfo:
7777
test_fn()
7878

79-
assert "Missing columns: ['FooColumn']. Got columns: ['Brand', 'Price']" in str(excinfo.value)
79+
assert "Missing columns: ['FooColumn'] in function 'test_fn' return value. Got columns: ['Brand', 'Price']" in str(
80+
excinfo.value
81+
)
8082

8183

8284
def test_df_out_with_df_modification(basic_pandas_df: pd.DataFrame, extended_pandas_df: pd.DataFrame) -> None:
@@ -116,7 +118,7 @@ def test_fn() -> pd.DataFrame:
116118
with pytest.raises(AssertionError) as excinfo:
117119
test_fn()
118120

119-
assert "Missing columns: ['r/NonExistent_[0-9]/']" in str(excinfo.value)
121+
assert "Missing columns: ['r/NonExistent_[0-9]/'] in function 'test_fn' return value" in str(excinfo.value)
120122

121123

122124
def test_regex_column_pattern_with_strict_in_output(basic_pandas_df: pd.DataFrame) -> None:
@@ -130,7 +132,7 @@ def test_fn() -> pd.DataFrame:
130132
with pytest.raises(AssertionError) as excinfo:
131133
test_fn()
132134

133-
assert "DataFrame contained unexpected column(s): Price" in str(excinfo.value)
135+
assert "DataFrame in function 'test_fn' return value contained unexpected column(s): Price" in str(excinfo.value)
134136

135137

136138
def test_regex_column_with_dtype_in_output_pandas(basic_pandas_df: pd.DataFrame) -> None:
@@ -161,7 +163,9 @@ def test_fn() -> pd.DataFrame:
161163
with pytest.raises(AssertionError) as excinfo:
162164
test_fn()
163165

164-
assert "Column Price_2 has wrong dtype. Was float64, expected int64" in str(excinfo.value)
166+
assert "Column Price_2 in function 'test_fn' return value has wrong dtype. Was float64, expected int64" in str(
167+
excinfo.value
168+
)
165169

166170

167171
def test_regex_column_with_dtype_in_output_polars(basic_polars_df: pl.DataFrame) -> None:
@@ -190,4 +194,73 @@ def test_fn() -> pd.DataFrame:
190194
with pytest.raises(AssertionError) as excinfo:
191195
test_fn()
192196

193-
assert "DataFrame contained unexpected column(s): Price" in str(excinfo.value)
197+
assert "DataFrame in function 'test_fn' return value contained unexpected column(s): Price" in str(excinfo.value)
198+
199+
200+
def test_function_name_appears_in_missing_columns_exception_output() -> None:
201+
@df_out(columns=["Brand", "NonExistentColumn"])
202+
def my_output_function() -> pd.DataFrame:
203+
return pd.DataFrame({"Brand": ["Toyota", "Honda"]})
204+
205+
with pytest.raises(AssertionError) as excinfo:
206+
my_output_function()
207+
208+
# Should include function name in the exception message
209+
assert "my_output_function" in str(excinfo.value)
210+
assert "Missing columns: ['NonExistentColumn']" in str(excinfo.value)
211+
212+
213+
def test_function_name_appears_in_dtype_mismatch_exception_output() -> None:
214+
@df_out(columns={"Brand": "object", "Price": "float64"})
215+
def another_output_function() -> pd.DataFrame:
216+
return pd.DataFrame({"Brand": ["Toyota"], "Price": [100]}) # Price is int64, not float64
217+
218+
with pytest.raises(AssertionError) as excinfo:
219+
another_output_function()
220+
221+
# Should include function name in the exception message
222+
assert "another_output_function" in str(excinfo.value)
223+
assert "Column Price" in str(excinfo.value)
224+
assert "wrong dtype" in str(excinfo.value)
225+
226+
227+
def test_return_value_validation_clearly_states_return_value_missing_columns() -> None:
228+
@df_out(columns=["Brand", "MissingColumn"])
229+
def function_with_return_validation() -> pd.DataFrame:
230+
return pd.DataFrame({"Brand": ["Toyota", "Honda"]})
231+
232+
with pytest.raises(AssertionError) as excinfo:
233+
function_with_return_validation()
234+
235+
# Should clearly state this is a return value validation failure
236+
assert "return value" in str(excinfo.value)
237+
assert "function_with_return_validation" in str(excinfo.value)
238+
assert "Missing columns: ['MissingColumn']" in str(excinfo.value)
239+
240+
241+
def test_return_value_validation_clearly_states_return_value_dtype_mismatch() -> None:
242+
@df_out(columns={"Brand": "object", "Price": "float64"})
243+
def function_with_dtype_validation() -> pd.DataFrame:
244+
return pd.DataFrame({"Brand": ["Toyota"], "Price": [100]}) # Price is int64, not float64
245+
246+
with pytest.raises(AssertionError) as excinfo:
247+
function_with_dtype_validation()
248+
249+
# Should clearly state this is a return value validation failure
250+
assert "return value" in str(excinfo.value)
251+
assert "function_with_dtype_validation" in str(excinfo.value)
252+
assert "wrong dtype" in str(excinfo.value)
253+
254+
255+
def test_return_value_validation_clearly_states_return_value_extra_columns() -> None:
256+
@df_out(columns=["Brand"], strict=True)
257+
def function_with_extra_columns() -> pd.DataFrame:
258+
return pd.DataFrame({"Brand": ["Toyota"], "Price": [100]})
259+
260+
with pytest.raises(AssertionError) as excinfo:
261+
function_with_extra_columns()
262+
263+
# Should clearly state this is a return value validation failure
264+
assert "return value" in str(excinfo.value)
265+
assert "function_with_extra_columns" in str(excinfo.value)
266+
assert "unexpected column(s): Price" in str(excinfo.value)

0 commit comments

Comments
 (0)