Include SQ arm in DerivedMetric output when relativize_inputs=True (#5029)

ItsMrLin · meta-codesync[bot] · commit 4c1ed837c4fc · 2026-03-15T11:56:52.000-07:00
Summary: Pull Request resolved: #5029 When `relativize_inputs=True`, the status quo arm was previously excluded from the output because its relativized values are trivially zero. This is incorrect for non-linear expressions: `exp(0) = 1`, not 0. Instead of skipping the SQ arm, construct a DataFrame with zero-valued inputs for all input metrics and let `_compute_derived_values` evaluate the expression on them. This produces correct SQ output for any expression (e.g., `a + b = 0`, `exp(a) = 1`). Reviewed By: Balandat Differential Revision: D96558255 fbshipit-source-id: 7c8283609f459149a19c72473251715e442d8c28
diff --git a/ax/core/derived_metric.py b/ax/core/derived_metric.py
@@ -318,8 +318,9 @@ def _relativize_arm_data(
         properly transform both means and SEMs.
 
         When ``relativize_inputs`` is ``False``, returns ``arm_data``
-        unchanged.  When ``True``, the status quo arm is excluded from
-        the returned dict (its relativized values are zero by definition).
+        unchanged.  When ``True``, the status quo arm is included with
+        zero-valued inputs so the expression can be evaluated on it
+        (e.g., ``exp(0)=1``).
         """
         if not self._relativize_inputs:
             return arm_data
@@ -342,8 +343,24 @@ def _relativize_arm_data(
         # different SQ metric values (non-stationarity).
         relativized: dict[str, pd.DataFrame] = {}
         for arm_name, arm_df in arm_data.items():
-            # Skip the SQ arm itself — its relativized values are zero.
+            # SQ relativized against itself is trivially zero for all inputs.
+            # Include it so _compute_derived_values can evaluate the expression
+            # on zeros (e.g., exp(0)=1, a+b=0).
             if arm_name == sq_name:
+                sq_rel_rows: list[dict[str, Any]] = []
+                status_quo_trial_index = int(arm_df["trial_index"].iloc[0])
+                for metric_name in self._input_metric_names:
+                    sq_rel_rows.append(
+                        {
+                            "trial_index": status_quo_trial_index,
+                            "arm_name": sq_name,
+                            "metric_name": metric_name,
+                            "metric_signature": metric_name,
+                            "mean": 0.0,
+                            "sem": 0.0,
+                        }
+                    )
+                relativized[sq_name] = pd.DataFrame(sq_rel_rows)
                 continue
 
             # Determine this arm's source trial_index from its data.
@@ -467,8 +484,8 @@ def fetch_trial_data(self, trial: BaseTrial, **kwargs: Any) -> MetricFetchResult
         if isinstance(arm_data_result, MetricFetchE):
             return Err(arm_data_result)
 
-        # After relativization, arm_data may be empty (e.g., a SQ-only trial
-        # where all arms were excluded).  Return empty data, not an error.
+        # After relativization, arm_data may be empty (e.g., a trial with
+        # no arms).  Return empty data, not an error.
         if not arm_data_result:
             return Ok(value=Data())
 
@@ -651,7 +668,8 @@ def _compute_derived_values(
         """Evaluate the expression for each arm using pre-collected data.
 
         When ``relativize_inputs`` is ``True``, the base class has already
-        relativized the ``mean`` values and excluded the status quo arm.
+        relativized the ``mean`` values.  The status quo arm is included
+        with zero-valued inputs.
         """
         result_rows: list[dict[str, Any]] = []
 
diff --git a/ax/core/tests/test_derived_metric.py b/ax/core/tests/test_derived_metric.py
@@ -265,11 +265,15 @@ def test_relativize_arm_data(self) -> None:
         result = metric.fetch_trial_data(exp.trials[0])
         self.assertIsInstance(result, Ok)
         df = none_throws(result.ok).df
-        # SQ arm should be excluded from output.
-        self.assertEqual(set(df["arm_name"].unique()), {"arm1"})
+        # SQ arm should be included with zero-valued inputs (sum=0).
+        self.assertEqual(set(df["arm_name"].unique()), {"sq", "arm1"})
+        sq_row = df[df["arm_name"] == "sq"]
+        # SQ: inputs are zero after relativization, sum(0,0) = 0.
+        self.assertAlmostEqual(sq_row["mean"].iloc[0], 0.0)
+        arm1_row = df[df["arm_name"] == "arm1"]
         # arm1 relativized (as_percent=True):
         # a=(15-10)/10=50%, b=(30-20)/20=50%; sum=100.0
-        self.assertAlmostEqual(df["mean"].iloc[0], 100.0)
+        self.assertAlmostEqual(arm1_row["mean"].iloc[0], 100.0)
 
         with self.subTest("no_status_quo"):
             exp_no_sq = Experiment(name="no_sq", search_space=get_branin_search_space())
@@ -698,12 +702,13 @@ def test_expression_evaluation_errors(self) -> None:
     # ------------------------------------------------------------------
 
     def test_relativize_inputs(self) -> None:
-        """Relativized fetch: correct computation, SQ excluded, multi-arm.
+        """Relativized fetch: correct computation, SQ included, multi-arm.
         Also verifies that relativize_inputs=False (default) includes SQ
         and uses raw values."""
         # SQ: a=10, b=4.
-        # arm_1: a=15, b=8 → a_rel=0.5, b_rel=1.0 → a/b = 0.5
-        # arm_2: a=20, b=6 → a_rel=1.0, b_rel=0.5 → a/b = 2.0
+        # arm_1: a=15, b=8 → a_rel=50%, b_rel=100% → a+b = 150
+        # arm_2: a=20, b=6 → a_rel=100%, b_rel=50% → a+b = 150
+        # SQ: a_rel=0, b_rel=0 → a+b = 0
         exp = self._batch_experiment_with_sq(
             sq_values={"a": 10.0, "b": 4.0},
             arm_values={
@@ -712,20 +717,47 @@ def test_relativize_inputs(self) -> None:
             },
         )
         metric = ExpressionDerivedMetric(
-            name="ratio_rel",
+            name="sum_rel",
             input_metric_names=["a", "b"],
-            expression_str="a / b",
+            expression_str="a + b",
             relativize_inputs=True,
         )
         result = metric.fetch_trial_data(exp.trials[0])
         self.assertIsInstance(result, Ok)
         df = none_throws(result.ok).df.sort_values("arm_name").reset_index(drop=True)
-        self.assertEqual(len(df), 2)
-        self.assertNotIn("status_quo", df["arm_name"].values)
-        self.assertAlmostEqual(df.loc[0, "mean"], 0.5, places=10)
-        self.assertAlmostEqual(df.loc[1, "mean"], 2.0, places=10)
+        # SQ is included: 3 rows (arm_1, arm_2, status_quo).
+        self.assertEqual(len(df), 3)
+        self.assertIn("status_quo", df["arm_name"].values)
+        arm1_row = df[df["arm_name"] == "arm_1"]
+        arm2_row = df[df["arm_name"] == "arm_2"]
+        sq_row = df[df["arm_name"] == "status_quo"]
+        self.assertAlmostEqual(arm1_row["mean"].iloc[0], 150.0, places=10)
+        self.assertAlmostEqual(arm2_row["mean"].iloc[0], 150.0, places=10)
+        # SQ: zero-valued inputs → a+b = 0.
+        self.assertAlmostEqual(sq_row["mean"].iloc[0], 0.0, places=10)
         self.assertTrue(df["sem"].isna().all())
 
+        with self.subTest("sq_evaluates_expression_on_zeros"):
+            # exp(0) = 1, verifying the expression is evaluated (not
+            # hardcoded to 0) on the SQ arm's zero-valued inputs.
+            exp2 = self._batch_experiment_with_sq(
+                sq_values={"a": 10.0},
+                arm_values={"arm_1": {"a": 15.0}},
+            )
+            metric2 = ExpressionDerivedMetric(
+                name="exp_a",
+                input_metric_names=["a"],
+                expression_str="exp(a)",
+                relativize_inputs=True,
+            )
+            result2 = metric2.fetch_trial_data(exp2.trials[0])
+            self.assertIsInstance(result2, Ok)
+            df2 = none_throws(result2.ok).df
+            sq_row2 = df2[df2["arm_name"] == "status_quo"]
+            self.assertEqual(len(sq_row2), 1)
+            # exp(0) = 1.0
+            self.assertAlmostEqual(sq_row2["mean"].iloc[0], 1.0, places=10)
+
         with self.subTest("not_applied_by_default"):
             exp = self._batch_experiment_with_sq(
                 sq_values={"a": 10.0, "b": 5.0},