Albrja/mic-6334/Rename strata to stratifications (#75)

albrja · web-flow · commit 31324425f8f0 · 2025-08-28T15:48:51.000-07:00
Albrja/mic-6334/Rename strata to stratifications Rename uses of strata to stratifications to keep naming consistent - *Category*: Refactor - *JIRA issue*: https://jira.ihme.washington.edu/browse/MIC-6334 Changes and notes -rename strata to stratifications ### Testing
diff --git a/src/vivarium_testing_utils/automated_validation/comparison.py b/src/vivarium_testing_utils/automated_validation/comparison.py
@@ -284,9 +284,9 @@ def _align_datasets(
                 for x in self.reference_data.index.names
                 if x not in reference_indexes_to_drop
             ]
-        aggregated_reference_data = self._aggregate_strata_reference(
+        aggregated_reference_data = self._aggregate_reference_stratifications(
             self.reference_data,
-            strata=stratifications,
+            stratifications=stratifications,
         )
 
         # If the test data has any index levels that are not in the reference data, marginalize
@@ -323,23 +323,25 @@ def _align_datasets(
         ## At this point, the only non-common index levels should be scenarios and draws.
         return stratified_test_data, aggregated_reference_data
 
-    def _aggregate_strata_reference(
-        self, data: pd.DataFrame, strata: Collection[str] = ()
+    def _aggregate_reference_stratifications(
+        self, data: pd.DataFrame, stratifications: Collection[str] = ()
     ) -> pd.DataFrame:
-        for stratum in strata:
+        for stratification in stratifications:
             if (
-                stratum not in data.index.names
-                and stratum not in self.reference_weights.index.names
+                stratification not in data.index.names
+                and stratification not in self.reference_weights.index.names
             ):
                 raise ValueError(
-                    f"Stratum '{stratum}' not found in reference data or weights."
+                    f"Stratum '{stratification}' not found in reference data or weights."
                 )
 
-        strata = list(strata)
+        stratifications = list(stratifications)
         # Retain input_draw, _aggregate_over_draws is the only place we should aggregate over draws.
-        if DRAW_INDEX in data.index.names and DRAW_INDEX not in strata:
-            strata.append(DRAW_INDEX)
-        weighted_avg = calculations.weighted_average(data, self.reference_weights, strata)
+        if DRAW_INDEX in data.index.names and DRAW_INDEX not in stratifications:
+            stratifications.append(DRAW_INDEX)
+        weighted_avg = calculations.weighted_average(
+            data, self.reference_weights, stratifications
+        )
         # Reference data can be a float or dataframe. Convert floats so dataframes are aligned
         if not isinstance(weighted_avg, pd.DataFrame):
             weighted_avg = pd.DataFrame(
diff --git a/tests/automated_validation/test_comparison.py b/tests/automated_validation/test_comparison.py
@@ -456,13 +456,13 @@ def test_fuzzy_comparison_align_datasets_calculation(
     )
 
 
-def test_aggregate_strata(
+def test_aggregate_stratifications(
     mock_ratio_measure: RatioMeasure,
     test_data: dict[str, pd.DataFrame],
     reference_data: pd.DataFrame,
     reference_weights: pd.DataFrame,
 ) -> None:
-    """Test that aggregate_strata correctly aggregates data."""
+    """Test that aggregate_stratifications correctly aggregates data."""
     comparison = FuzzyComparison(
         mock_ratio_measure,
         DataSource.SIM,
@@ -472,7 +472,9 @@ def test_aggregate_strata(
         reference_weights,
     )
 
-    aggregated = comparison._aggregate_strata_reference(reference_data, ["age", "sex"])
+    aggregated = comparison._aggregate_reference_stratifications(
+        reference_data, ["age", "sex"]
+    )
     # (0, Male) = (0.12 * 0.15 + 0.29 * 0.35) / (0.15 + 0.35)
     expected = pd.DataFrame(
         {
@@ -493,7 +495,7 @@ def test_aggregate_strata(
     pd.testing.assert_frame_equal(aggregated, expected)
 
     with pytest.raises(ValueError, match="not found in reference data or weights"):
-        comparison._aggregate_strata_reference(reference_data, ["dog", "cat"])
+        comparison._aggregate_reference_stratifications(reference_data, ["dog", "cat"])
 
 
 def _add_draws_to_dataframe(df: pd.DataFrame, draw_values: list[int]) -> pd.DataFrame: