Merge pull request #25 from vertti/parameter-names-logging

vertti · web-flow · commit 37d2dac6c926 · 2025-06-19T13:12:46.000+03:00
Parameter names logging
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,10 @@
 
 All notable changes to this project will be documented in this file.
 
+## 0.14.0
+
+- Improve df_in error messages to include parameter names
+
 ## 0.13.2
 
 - Updated urls for Pypi site compatibility
diff --git a/daffy/decorators.py b/daffy/decorators.py
@@ -54,7 +54,9 @@ def _compile_regex_patterns(columns: Seq[Any]) -> List[Union[str, RegexColumnDef
     return result
 
 
-def _check_columns(df: DataFrameType, columns: Union[ColumnsList, ColumnsDict], strict: bool) -> None:
+def _check_columns(
+    df: DataFrameType, columns: Union[ColumnsList, ColumnsDict], strict: bool, param_name: Optional[str] = None
+) -> None:
     missing_columns = []
     dtype_mismatches = []
     matched_by_regex = set()
@@ -113,11 +115,16 @@ def _check_columns(df: DataFrameType, columns: Union[ColumnsList, ColumnsDict],
                             dtype_mismatches.append((matched_col, df[matched_col].dtype, dtype))
 
     if missing_columns:
-        raise AssertionError(f"Missing columns: {missing_columns}. Got {_describe_pd(df)}")
+        param_info = f" in parameter '{param_name}'" if param_name else ""
+        raise AssertionError(f"Missing columns: {missing_columns}{param_info}. Got {_describe_pd(df)}")
 
     if dtype_mismatches:
+        param_info = f" in parameter '{param_name}'" if param_name else ""
         mismatches = ", ".join(
-            [f"Column {col} has wrong dtype. Was {was}, expected {expected}" for col, was, expected in dtype_mismatches]
+            [
+                f"Column {col}{param_info} has wrong dtype. Was {was}, expected {expected}"
+                for col, was, expected in dtype_mismatches
+            ]
         )
         raise AssertionError(mismatches)
 
@@ -134,7 +141,8 @@ def _check_columns(df: DataFrameType, columns: Union[ColumnsList, ColumnsDict],
             extra_columns = set(df.columns) - allowed_columns
 
         if extra_columns:
-            raise AssertionError(f"DataFrame contained unexpected column(s): {', '.join(extra_columns)}")
+            param_info = f" in parameter '{param_name}'" if param_name else ""
+            raise AssertionError(f"DataFrame{param_info} contained unexpected column(s): {', '.join(extra_columns)}")
 
 
 def df_out(
@@ -189,6 +197,26 @@ def _get_parameter(func: Callable[..., Any], name: Optional[str] = None, *args:
     return kwargs[name]
 
 
+def _get_parameter_name(
+    func: Callable[..., Any], name: Optional[str] = None, *args: Any, **kwargs: Any
+) -> Optional[str]:
+    """Get the actual parameter name being validated."""
+    if name:
+        return name
+
+    # If no name specified, try to get the first parameter name
+    if len(args) > 0:
+        # Get the first parameter name from the function signature
+        func_params_in_order = list(inspect.signature(func).parameters.keys())
+        if func_params_in_order:
+            return func_params_in_order[0]
+    elif kwargs:
+        # Return the first keyword argument name
+        return next(iter(kwargs.keys()))
+
+    return None
+
+
 def df_in(
     name: Optional[str] = None, columns: Union[ColumnsList, ColumnsDict, None] = None, strict: Optional[bool] = None
 ) -> Callable[[Callable[..., R]], Callable[..., R]]:
@@ -214,11 +242,12 @@ def wrapper_df_in(func: Callable[..., R]) -> Callable[..., R]:
         @wraps(func)
         def wrapper(*args: Any, **kwargs: Any) -> R:
             df = _get_parameter(func, name, *args, **kwargs)
+            param_name = _get_parameter_name(func, name, *args, **kwargs)
             assert isinstance(df, pd.DataFrame) or isinstance(df, pl.DataFrame), (
                 f"Wrong parameter type. Expected DataFrame, got {type(df).__name__} instead."
             )
             if columns:
-                _check_columns(df, columns, get_strict(strict))
+                _check_columns(df, columns, get_strict(strict), param_name)
             return func(*args, **kwargs)
 
         return wrapper
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "daffy"
-version = "0.13.2"
+version = "0.14.0"
 description = "Function decorators for Pandas and Polars Dataframe column name and data type validation"
 authors = [
  { name="Janne Sinivirta", email="janne.sinivirta@gmail.com" },
diff --git a/tests/test_df_in.py b/tests/test_df_in.py
@@ -104,7 +104,7 @@ def test_fn(my_input: Any, _df: DataFrameType) -> DataFrameType:
     with pytest.raises(AssertionError) as excinfo:
         test_fn("foo", _df=df)
 
-    assert "DataFrame contained unexpected column(s): Price" in str(excinfo.value)
+    assert "DataFrame in parameter '_df' contained unexpected column(s): Price" in str(excinfo.value)
 
 
 def test_correct_input_with_columns_and_dtypes_pandas(basic_pandas_df: pd.DataFrame) -> None:
@@ -131,7 +131,7 @@ def test_fn(my_input: Any) -> Any:
     with pytest.raises(AssertionError) as excinfo:
         test_fn(basic_pandas_df)
 
-    assert "Column Price has wrong dtype. Was int64, expected float64" in str(excinfo.value)
+    assert "Column Price in parameter 'my_input' has wrong dtype. Was int64, expected float64" in str(excinfo.value)
 
 
 def test_dtype_mismatch_polars(basic_polars_df: pl.DataFrame) -> None:
@@ -142,7 +142,7 @@ def test_fn(my_input: Any) -> Any:
     with pytest.raises(AssertionError) as excinfo:
         test_fn(basic_polars_df)
 
-    assert "Column Price has wrong dtype. Was Int64, expected Float64" in str(excinfo.value)
+    assert "Column Price in parameter 'my_input' has wrong dtype. Was Int64, expected Float64" in str(excinfo.value)
 
 
 @pytest.mark.parametrize(("df"), [pd.DataFrame(cars), pl.DataFrame(cars)])
@@ -153,7 +153,7 @@ def test_fn(my_input: Any) -> Any:
 
     with pytest.raises(AssertionError) as excinfo:
         test_fn(df[["Brand"]])
-    assert "Missing columns: ['Price']. Got columns: ['Brand']" in str(excinfo.value)
+    assert "Missing columns: ['Price'] in parameter 'my_input'. Got columns: ['Brand']" in str(excinfo.value)
 
 
 @pytest.mark.parametrize(("df"), [pd.DataFrame(cars), pl.DataFrame(cars)])
@@ -164,7 +164,7 @@ def test_fn(my_input: Any) -> Any:
 
     with pytest.raises(AssertionError) as excinfo:
         test_fn(df[["Brand"]])
-    assert "Missing columns: ['Price', 'Extra']. Got columns: ['Brand']" in str(excinfo.value)
+    assert "Missing columns: ['Price', 'Extra'] in parameter 'my_input'. Got columns: ['Brand']" in str(excinfo.value)
 
 
 @pytest.mark.parametrize(
@@ -254,7 +254,7 @@ def test_fn(my_input: Any) -> Any:
     with pytest.raises(AssertionError) as excinfo:
         test_fn(df)
 
-    assert "DataFrame contained unexpected column(s): Price" in str(excinfo.value)
+    assert "DataFrame in parameter 'my_input' contained unexpected column(s): Price" in str(excinfo.value)
 
 
 def test_regex_column_with_dtype_pandas(basic_pandas_df: pd.DataFrame) -> None:
@@ -287,7 +287,7 @@ def test_fn(my_input: Any) -> Any:
     with pytest.raises(AssertionError) as excinfo:
         test_fn(df)
 
-    assert "Column Price_2 has wrong dtype. Was float64, expected int64" in str(excinfo.value)
+    assert "Column Price_2 in parameter 'my_input' has wrong dtype. Was float64, expected int64" in str(excinfo.value)
 
 
 def test_regex_column_with_dtype_polars(basic_polars_df: pl.DataFrame) -> None:
@@ -303,3 +303,22 @@ def test_fn(my_input: Any) -> Any:
     result = test_fn(df)
     assert "Price_1" in result.columns
     assert "Price_2" in result.columns
+
+
+@pytest.mark.parametrize(
+    ("basic_df,extended_df"),
+    [(pd.DataFrame(cars), pd.DataFrame(extended_cars)), (pl.DataFrame(cars), pl.DataFrame(extended_cars))],
+)
+def test_multiple_parameters_error_identification(basic_df: DataFrameType, extended_df: DataFrameType) -> None:
+    """Test that we can identify which parameter has the issue when multiple dataframes are used."""
+
+    @df_in(name="cars", columns=["Brand", "Price"], strict=True)
+    @df_in(name="ext_cars", columns=["Brand", "Price", "Year", "NonExistent"], strict=True)
+    def test_fn(cars: DataFrameType, ext_cars: DataFrameType) -> int:
+        return len(cars) + len(ext_cars)
+
+    # Test missing column in second parameter
+    with pytest.raises(AssertionError) as excinfo:
+        test_fn(cars=basic_df, ext_cars=extended_df)
+
+    assert "Missing columns: ['NonExistent'] in parameter 'ext_cars'" in str(excinfo.value)