Albrja/mic 6247/specific rate aggregation weights (#70)

albrja · web-flow · commit 2324a274430a · 2025-08-11T09:28:55.000-07:00
Albrja/mic 6247/specific rate aggregation weights Add specific rate aggregation weights implementation - *Category*: Implementation - *JIRA issue*: https://jira.ihme.washington.edu/browse/MIC-6247 Changes and notes -add specific implementations of rate aggregation weights for all RatioMeasure classes except PopulationStructure
diff --git a/src/vivarium_testing_utils/automated_validation/data_transformation/measures.py b/src/vivarium_testing_utils/automated_validation/data_transformation/measures.py
@@ -25,6 +25,7 @@
 )
 from vivarium_testing_utils.automated_validation.data_transformation.rate_aggregation import (
     RateAggregationWeights,
+    population_weighted,
 )
 
 
@@ -172,12 +173,13 @@ class Incidence(RatioMeasure):
 
     @property
     def rate_aggregation_weights(self) -> RateAggregationWeights:
-        """Returns rated aggregated weights."""
+        """Returns rate aggregated weights."""
         return RateAggregationWeights(
             weight_keys={
                 "population": "population.structure",
                 "prevalence": f"cause.{self.entity}.prevalence",
             },
+            # TODO: Update formula to account for having more than two states. Only works for SI and SIS models.
             formula=lambda population, prevalence: population * (1 - prevalence),
             description="Person-time × (1 - prevalence) weighted average",
         )
@@ -201,8 +203,8 @@ class Prevalence(RatioMeasure):
 
     @property
     def rate_aggregation_weights(self) -> RateAggregationWeights:
-        """Will be implemented with MIC-6247."""
-        raise NotImplementedError
+        """Returns rate aggregated weights."""
+        return population_weighted()
 
     def __init__(self, cause: str) -> None:
         super().__init__(
@@ -223,8 +225,15 @@ class SIRemission(RatioMeasure):
 
     @property
     def rate_aggregation_weights(self) -> RateAggregationWeights:
-        """Will be implemented with MIC-6247."""
-        raise NotImplementedError
+        """Returns rate aggregated weights."""
+        return RateAggregationWeights(
+            weight_keys={
+                "population": "population.structure",
+                "prevalence": f"cause.{self.entity}.prevalence",
+            },
+            formula=lambda population, prevalence: population * prevalence,
+            description="Person-time × prevalence weighted average",
+        )
 
     def __init__(self, cause: str) -> None:
         super().__init__(
@@ -245,8 +254,8 @@ class CauseSpecificMortalityRate(RatioMeasure):
 
     @property
     def rate_aggregation_weights(self) -> RateAggregationWeights:
-        """Will be implemented with MIC-6247."""
-        raise NotImplementedError
+        """Returns rate aggregated weights."""
+        return population_weighted()
 
     def __init__(self, cause: str) -> None:
         super().__init__(
@@ -267,8 +276,15 @@ class ExcessMortalityRate(RatioMeasure):
 
     @property
     def rate_aggregation_weights(self) -> RateAggregationWeights:
-        """Will be implemented with MIC-6247."""
-        raise NotImplementedError
+        """Returns rate aggregated weights."""
+        return RateAggregationWeights(
+            weight_keys={
+                "population": "population.structure",
+                "prevalence": f"cause.{self.entity}.prevalence",
+            },
+            formula=lambda population, prevalence: population * prevalence,
+            description="Person-time × prevalence weighted average",
+        )
 
     def __init__(self, cause: str) -> None:
         super().__init__(
@@ -296,7 +312,7 @@ class PopulationStructure(RatioMeasure):
 
     @property
     def rate_aggregation_weights(self) -> RateAggregationWeights:
-        """Will be implemented with MIC-6247."""
+        """This will be implemented when we refactor and implement DataBundle Mic-6241."""
         raise NotImplementedError
 
     def __init__(self, scenario_columns: list[str]):
@@ -347,8 +363,8 @@ class RiskExposure(RatioMeasure):
 
     @property
     def rate_aggregation_weights(self) -> RateAggregationWeights:
-        """Will be implemented with MIC-6247."""
-        raise NotImplementedError
+        """Returns rate aggregated weights."""
+        return population_weighted()
 
     def __init__(self, risk_factor: str) -> None:
         super().__init__(
@@ -426,8 +442,8 @@ def artifact_datasets(self) -> dict[str, str]:
 
     @property
     def rate_aggregation_weights(self) -> RateAggregationWeights:
-        """Will be implemented with MIC-6247."""
-        raise NotImplementedError
+        """Returns rate aggregated weights."""
+        return self.affected_measure.rate_aggregation_weights
 
     @utils.check_io(
         relative_risks=SingleNumericColumn,
diff --git a/src/vivarium_testing_utils/automated_validation/data_transformation/rate_aggregation.py b/src/vivarium_testing_utils/automated_validation/data_transformation/rate_aggregation.py
@@ -22,3 +22,11 @@ class RateAggregationWeights:
     @utils.check_io(out=SingleNumericColumn)
     def get_weights(self, *args: Any, **kwargs: Any) -> pd.DataFrame:
         return self.formula(*args, **kwargs)
+
+
+def population_weighted() -> RateAggregationWeights:
+    return RateAggregationWeights(
+        weight_keys={"population": "population.structure"},
+        formula=lambda population: population,
+        description="Population-weighted average",
+    )
diff --git a/tests/automated_validation/data_transformation/test_measures.py b/tests/automated_validation/data_transformation/test_measures.py
@@ -579,40 +579,130 @@ def test_format_title() -> None:
     assert _format_title("measure.entity") == "Measure Entity"
 
 
-def test_rate_aggregation_weights() -> None:
-    """Test the rate_aggregation_weights property of Incidence measure."""
-    cause = "disease"
-    measure = Incidence(cause)
-
+@pytest.mark.parametrize(
+    "measure_class,measure_args,expected_weights_config,expected_description",
+    [
+        (
+            Incidence,
+            ("disease",),
+            {
+                "population": "population.structure",
+                "prevalence": "cause.disease.prevalence",
+            },
+            "Person-time × (1 - prevalence) weighted average",
+        ),
+        (
+            Prevalence,
+            ("disease",),
+            {"population": "population.structure"},
+            "Population-weighted average",
+        ),
+        (
+            SIRemission,
+            ("disease",),
+            {
+                "population": "population.structure",
+                "prevalence": "cause.disease.prevalence",
+            },
+            "Person-time × prevalence weighted average",
+        ),
+        (
+            CauseSpecificMortalityRate,
+            ("disease",),
+            {"population": "population.structure"},
+            "Population-weighted average",
+        ),
+        (
+            ExcessMortalityRate,
+            ("disease",),
+            {
+                "population": "population.structure",
+                "prevalence": "cause.disease.prevalence",
+            },
+            "Person-time × prevalence weighted average",
+        ),
+        (
+            RiskExposure,
+            ("child_stunting",),
+            {"population": "population.structure"},
+            "Population-weighted average",
+        ),
+        (
+            CategoricalRelativeRisk,
+            (
+                "risky_risk",
+                "disease",
+                "excess_mortality_rate",
+                "common_stratify_column",
+                None,
+            ),
+            {
+                "population": "population.structure",
+                "prevalence": "cause.disease.prevalence",
+            },
+            "Person-time × prevalence weighted average",
+        ),
+        (
+            PopulationStructure,
+            (["scenario"],),
+            None,  # Not used since it raises NotImplementedError
+            None,  # Not used since it raises NotImplementedError
+        ),
+    ],
+)
+def test_rate_aggregation_weights(
+    measure_class: type[RatioMeasure],
+    measure_args: tuple[str],
+    expected_weights_config: dict[str, str] | None,
+    expected_description: str | None,
+) -> None:
+    """Test the rate_aggregation_weights property of various RatioMeasure subclasses."""
+    # Create the measure instance
+    measure = measure_class(*measure_args)  # type: ignore[call-arg]
+
+    if isinstance(measure, PopulationStructure):
+        # Test that PopulationStructure raises NotImplementedError
+        with pytest.raises(NotImplementedError):
+            _ = measure.rate_aggregation_weights
+        return
+
+    assert expected_weights_config is not None
+    assert expected_description is not None
     # Get the rate aggregation weights
     rate_agg_weights = measure.rate_aggregation_weights
-
     # Verify the configuration
-    expected_keys = {
-        "population": "population.structure",
-        "prevalence": f"cause.{cause}.prevalence",
-    }
-    assert rate_agg_weights.weight_keys == expected_keys
-    assert rate_agg_weights.description == "Person-time × (1 - prevalence) weighted average"
+    assert rate_agg_weights.weight_keys == expected_weights_config
+    assert rate_agg_weights.description == expected_description
 
     # Create test data matching expected format
     test_index = pd.MultiIndex.from_tuples(
         [("A", "baseline"), ("B", "baseline")], names=["common_stratify_column", "scenario"]
     )
-
     # Population structure data (proportions summing to 1)
     population_data = get_expected_dataframe(0.6, 0.4)
-    # Prevalence data (proportions between 0 and 1)
-    prevalence_data = get_expected_dataframe(0.1, 0.2)
-
-    # Test get_weights with keyword arguments
-    weights = rate_agg_weights.get_weights(
-        population=population_data, prevalence=prevalence_data
-    )
+    # Mock data from artifact
+    key_data = get_expected_dataframe(0.1, 0.2)
 
-    # Expected calculation: population * (1 - prevalence)
-    expected_weights = pd.DataFrame(
-        {"value": [0.6 * (1 - 0.1), 0.4 * (1 - 0.2)]}, index=test_index  # [0.54, 0.32]
-    )
+    if len(rate_agg_weights.weight_keys) > 1:
+        weights = rate_agg_weights.get_weights(population_data, key_data)
+    else:
+        weights = rate_agg_weights.get_weights(population_data)
+
+    # Expected calculation depends on the measure type
+    if "prevalence" in expected_weights_config:
+        if "1 - prevalence" in expected_description:
+            # Incidence: population * (1 - prevalence)
+            expected_weights = pd.DataFrame(
+                {"value": [0.6 * (1 - 0.1), 0.4 * (1 - 0.2)]},
+                index=test_index,  # [0.54, 0.32]
+            )
+        else:
+            # SIRemission and ExcessMortalityRate: population * prevalence
+            expected_weights = pd.DataFrame(
+                {"value": [0.6 * 0.1, 0.4 * 0.2]}, index=test_index  # [0.06, 0.08]
+            )
+    else:
+        # Population weighted measures: just population
+        expected_weights = population_data
 
     pd.testing.assert_frame_equal(weights, expected_weights)