Merge pull request #24 from ihmeuw-msca/bugfix/metrics-include_groups

saalUW · web-flow · commit d1719c482e17 · 2025-08-15T17:02:56.000-07:00
Bugfix/metrics include groups
diff --git a/pyproject.toml b/pyproject.toml
@@ -22,6 +22,7 @@ dependencies = [
     "scipy",
     "pydantic",
     "scikit-learn",
+    "pandas",
 ]
 
 [project.optional-dependencies]
diff --git a/src/msca/metrics/main.py b/src/msca/metrics/main.py
@@ -1,7 +1,7 @@
 from enum import StrEnum, auto
+
 import numpy as np
 import pandas as pd
-
 from sklearn import metrics
 
 
@@ -246,7 +246,6 @@ def _eval_grouped(
                 obs,
                 pred,
                 weights,
-                include_groups=False,
             )
             .reset_index()
         )
diff --git a/tests/metrics/__init__.py b/tests/metrics/__init__.py
diff --git a/tests/metrics/test_metrics.py b/tests/metrics/test_metrics.py
@@ -0,0 +1,110 @@
+import numpy as np
+import pandas as pd
+import pytest
+from sklearn import metrics
+
+from msca.metrics import Metric  # Replace with actual import path
+
+
+@pytest.fixture
+def sample_data():
+    return pd.DataFrame(
+        {
+            "obs": [1.0, 2.0, 3.0, 4.0],
+            "pred": [1.1, 1.9, 3.2, 3.8],
+            "pred_alt": [1.2, 2.1, 3.1, 3.9],
+            "pred_ref": [1.1, 2.0, 3.1, 4.0],
+            "weights": [1.0, 1.0, 1.0, 1.0],
+            "region": ["A", "A", "B", "B"],
+        }
+    )
+
+
+@pytest.mark.parametrize(
+    "metric",
+    [
+        Metric.MEAN_ABSOLUTE_ERROR,
+        Metric.MEAN_SQUARED_ERROR,
+        Metric.MEAN_ABSOLUTE_PERCENTAGE_ERROR,
+        Metric.MEDIAN_ABSOLUTE_ERROR,
+        Metric.ROOT_MEAN_SQUARED_ERROR,
+    ],
+)
+def test_eval_single_metric(metric, sample_data):
+    result = metric.eval(sample_data, "obs", "pred", "weights")
+    assert isinstance(result, float)
+    assert result >= 0
+
+
+@pytest.mark.parametrize(
+    "metric_enum",
+    [
+        Metric.MEAN_ABSOLUTE_ERROR,
+        Metric.MEAN_SQUARED_ERROR,
+    ],
+)
+def test_eval_grouped(metric_enum, sample_data):
+    result_df = metric_enum.eval(
+        sample_data, "obs", "pred", "weights", groupby=["region"]
+    )
+    assert isinstance(result_df, pd.DataFrame)
+    assert "region" in result_df.columns
+    metric_col = f"pred_{metric_enum.value}"
+    assert metric_col in result_df.columns
+    assert len(result_df) == sample_data["region"].nunique()
+
+
+def test_eval_skill_single(sample_data):
+    metric = Metric.MEAN_ABSOLUTE_ERROR
+    score = metric.eval_skill(
+        sample_data, "obs", "pred_alt", "pred_ref", "weights"
+    )
+    assert isinstance(score, float)
+    assert score <= 1  # skill score range
+
+
+def test_eval_skill_grouped(sample_data):
+    metric = Metric.MEAN_ABSOLUTE_ERROR
+    df = metric.eval_skill(
+        sample_data,
+        "obs",
+        "pred_alt",
+        "pred_ref",
+        "weights",
+        groupby=["region"],
+    )
+    assert isinstance(df, pd.DataFrame)
+    assert "region" in df.columns
+    skill_col = f"pred_alt_{metric.value}_skill"
+    assert skill_col in df.columns
+
+
+def test_eval_skill_zero_division_grouped(sample_data):
+    # Force reference metric to be zero
+    sample_data["pred_ref"] = sample_data["obs"]
+    metric = Metric.MEAN_ABSOLUTE_ERROR
+
+    # Make obs == pred_ref so MAE is zero
+    with pytest.raises(ZeroDivisionError):
+        metric.eval_skill(
+            sample_data,
+            "obs",
+            "pred_alt",
+            "pred_ref",
+            "weights",
+            groupby=["region"],
+        )
+
+
+def test_eval_skill_zero_division_single(sample_data):
+    # Force reference metric to be zero
+    sample_data["pred_ref"] = sample_data["obs"]
+    metric = Metric.MEAN_ABSOLUTE_ERROR
+    with pytest.raises(ZeroDivisionError):
+        metric.eval_skill(sample_data, "obs", "pred_alt", "pred_ref", "weights")
+
+
+def test_eval_single_unsupported_metric(sample_data):
+    with pytest.raises(ValueError):
+        fake = Metric("fake")
+        fake._eval_single(sample_data, "obs", "pred", "weights")

Original file line number	Diff line number	Diff line change
`@@ -22,6 +22,7 @@ dependencies = [`
`22`	`22`	`"scipy",`
`23`	`23`	`"pydantic",`
`24`	`24`	`"scikit-learn",`
	`25`	`+ "pandas",`
`25`	`26`	`]`
`26`	`27`
`27`	`28`	`[project.optional-dependencies]`
Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`from enum import StrEnum, auto`
	`2`	`+`
`2`	`3`	`import numpy as np`
`3`	`4`	`import pandas as pd`
`4`		`-`
`5`	`5`	`from sklearn import metrics`
`6`	`6`
`7`	`7`
`@@ -246,7 +246,6 @@ def _eval_grouped(`
`246`	`246`	`obs,`
`247`	`247`	`pred,`
`248`	`248`	`weights,`
`249`		`- include_groups=False,`
`250`	`249`	`)`
`251`	`250`	`.reset_index()`
`252`	`251`	`)`