add metric prediction summary to cross validation analysis

bletham · meta-codesync[bot] · commit e32c80c57e8b · 2026-03-03T09:56:12.000-08:00
Differential Revision: D94553707
diff --git a/ax/analysis/diagnostics.py b/ax/analysis/diagnostics.py
@@ -11,6 +11,7 @@
 from ax.analysis.analysis import Analysis
 from ax.analysis.graphviz.generation_strategy_graph import GenerationStrategyGraph
 from ax.analysis.plotly.cross_validation import CrossValidationPlot
+from ax.analysis.plotly.metric_r2 import create_metric_r2_analysis_card
 from ax.analysis.utils import validate_experiment
 from ax.core.analysis_card import AnalysisCardGroup
 from ax.core.experiment import Experiment
@@ -76,17 +77,18 @@ def compute(
             generation_strategy_name=generation_strategy.name
         )
 
-        cross_validation_plots = (
-            [
-                CrossValidationPlot(metric_names=metric_names).compute_or_error_card(
-                    experiment=experiment,
-                    generation_strategy=generation_strategy,
-                    adapter=adapter,
-                )
-            ]
-            if not is_bandit
-            else []
-        )
+        cross_validation_plots = []
+        metric_r2_card = []
+        if not is_bandit:
+            cv_analysis = CrossValidationPlot(metric_names=metric_names)
+            cv_card = cv_analysis.compute_or_error_card(
+                experiment=experiment,
+                generation_strategy=generation_strategy,
+                adapter=adapter,
+            )
+            cross_validation_plots = [cv_card]
+            if cv_analysis._r2s:
+                metric_r2_card = [create_metric_r2_analysis_card(r2s=cv_analysis._r2s)]
 
         generation_strategy_graph = (
             [
@@ -103,5 +105,9 @@ def compute(
         return self._create_analysis_card_group(
             title=DIAGNOSTICS_CARDGROUP_TITLE,
             subtitle=DIAGNOSTICS_CARDGROUP_SUBTITLE,
-            children=[*cross_validation_plots, *generation_strategy_graph],
+            children=[
+                *cross_validation_plots,
+                *metric_r2_card,
+                *generation_strategy_graph,
+            ],
         )
diff --git a/ax/analysis/plotly/cross_validation.py b/ax/analysis/plotly/cross_validation.py
@@ -106,6 +106,7 @@ def __init__(
         self.untransform = untransform
         self.trial_index = trial_index
         self.labels: dict[str, str] = {**labels} if labels is not None else {}
+        self._r2s: dict[str, float] = {}
 
     @override
     def validate_applicable_state(
@@ -144,6 +145,7 @@ def compute(
             relevant_adapter._experiment.signature_to_metric[signature].name
             for signature in relevant_adapter._metric_signatures
         ]
+        self._r2s = {}
         for metric_name in self.metric_names or relevant_adapter_metric_names:
             df = _prepare_data(
                 metric_name=metric_name, cv_results=cv_results, adapter=relevant_adapter
@@ -162,6 +164,7 @@ def compute(
                 y_obs=df["observed"].to_numpy(),
                 y_pred=df["predicted"].to_numpy(),
             )
+            self._r2s[metric_title] = r_squared
 
             # Define the cross-validation description based on the number of folds
             cv_description = (
diff --git a/ax/analysis/plotly/metric_r2.py b/ax/analysis/plotly/metric_r2.py
@@ -0,0 +1,67 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-strict
+
+import pandas as pd
+from ax.analysis.healthcheck.predictable_metrics import DEFAULT_MODEL_FIT_THRESHOLD
+from ax.analysis.plotly.plotly_analysis import PlotlyAnalysisCard
+from plotly import graph_objects as go, io as pio
+
+
+class MetricR2AnalysisCard(PlotlyAnalysisCard):
+    """A PlotlyAnalysisCard that displays a table of metric R² values
+    with green highlighting for metrics that meet the model fit threshold."""
+
+
+def create_metric_r2_analysis_card(
+    r2s: dict[str, float],
+    threshold: float = DEFAULT_MODEL_FIT_THRESHOLD,
+) -> MetricR2AnalysisCard:
+    """Create a MetricR2AnalysisCard from a dictionary of metric R² values.
+
+    Args:
+        r2s: Dictionary mapping metric names to their R² values.
+        threshold: R² threshold for highlighting a metric as having
+            good model fit. Defaults to DEFAULT_MODEL_FIT_THRESHOLD.
+
+    Returns:
+        A MetricR2AnalysisCard with a table of metric R² values.
+    """
+    metric_names = list(r2s.keys())
+    r2_values = [f"{v:.2f}" for v in r2s.values()]
+
+    fill_colors = [
+        "rgba(0, 200, 0, 0.15)" if r2 >= threshold else "white" for r2 in r2s.values()
+    ]
+
+    fig = go.Figure(
+        data=[
+            go.Table(
+                columnwidth=[4, 1],
+                header={
+                    "values": ["Metric", "R\u00b2"],
+                    "align": "left",
+                },
+                cells={
+                    "values": [metric_names, r2_values],
+                    "align": "left",
+                    "fill_color": [fill_colors, fill_colors],
+                },
+            )
+        ]
+    )
+
+    return MetricR2AnalysisCard(
+        name="MetricR2Summary",
+        title="Summary of model fits",
+        subtitle=(
+            "R\u00b2 (coefficient of determination) measures how well the model"
+            " predicts each metric. Higher values indicate better model fit."
+            f" Metrics with R\u00b2 >= {threshold} are highlighted in green."
+        ),
+        df=pd.DataFrame({"Metric": metric_names, "R\u00b2": list(r2s.values())}),
+        blob=pio.to_json(fig),
+    )
diff --git a/ax/analysis/plotly/tests/test_cross_validation.py b/ax/analysis/plotly/tests/test_cross_validation.py
@@ -106,6 +106,10 @@ def test_compute(self, mock_r2: mock.Mock) -> None:
         )
         self.assertIsNotNone(card.blob)
 
+        # Assert that _r2s is populated after compute
+        self.assertIn("bar", analysis._r2s)
+        self.assertAlmostEqual(analysis._r2s["bar"], 0.85)
+
         # Assert that all arms are in the cross validation df
         # because trial index is not specified
         for t in self.client.experiment.trials.values():
diff --git a/ax/analysis/tests/test_diagnostics.py b/ax/analysis/tests/test_diagnostics.py
@@ -16,6 +16,7 @@
     DIAGNOSTICS_CARDGROUP_TITLE,
 )
 from ax.analysis.plotly.cross_validation import CrossValidationPlot
+from ax.analysis.plotly.metric_r2 import MetricR2AnalysisCard
 from ax.api.client import Client
 from ax.api.configs import RangeParameterConfig
 from ax.core.analysis_card import ErrorAnalysisCard
@@ -118,6 +119,13 @@ def test_compute(self) -> None:
         for card in card_group.flatten():
             self.assertNotIsInstance(card, ErrorAnalysisCard)
 
+        # Should have a MetricR2AnalysisCard with the expected title
+        r2_cards = [
+            c for c in card_group.flatten() if isinstance(c, MetricR2AnalysisCard)
+        ]
+        self.assertEqual(len(r2_cards), 1)
+        self.assertEqual(r2_cards[0].title, "Summary of model fits")
+
         # --- Verify metric_names via patching CrossValidationPlot ---
         original_cv_init: Callable[..., None] = CrossValidationPlot.__init__
 
@@ -163,6 +171,12 @@ def capturing_init(self: CrossValidationPlot, **kwargs: object) -> None:
         self.assertIn("CrossValidationPlot", child_names_no_gs)
         self.assertNotIn("GenerationStrategyGraph", child_names_no_gs)
 
+        # MetricR2AnalysisCard not present when CV errors (no adapter available)
+        r2_cards_no_gs = [
+            c for c in card_group_no_gs.flatten() if isinstance(c, MetricR2AnalysisCard)
+        ]
+        self.assertEqual(len(r2_cards_no_gs), 0)
+
     def test_compute_bandit(self) -> None:
         experiment = Experiment(
             name="bandit_test",
@@ -250,6 +264,12 @@ def test_compute_bandit(self) -> None:
         # Bandit experiment should NOT include CrossValidationPlot
         self.assertNotIn("CrossValidationPlot", child_names)
 
+        # Bandit experiment should NOT include MetricR2AnalysisCard
+        r2_cards = [
+            c for c in card_group.flatten() if isinstance(c, MetricR2AnalysisCard)
+        ]
+        self.assertEqual(len(r2_cards), 0)
+
         # GenerationStrategyGraph should still be included (GS is provided)
         self.assertIn("GenerationStrategyGraph", child_names)