Skip TransformToNewSQ for metrics with near-zero status quo mean (#5076)

ItsMrLin · meta-codesync[bot] · commit 446ddb8e8338 · 2026-03-19T18:18:42.000-07:00
Summary: Pull Request resolved: #5076 When `ExpressionDerivedMetric` is used as an objective in PTS experiments, its status quo value is naturally zero (0% change from itself). This caused `TransformToNewSQ.transform_experiment_data` to crash with a `ValueError` in `relativize()` because division by zero is undefined for the delta method. D96574758 handled missing SQ data (trials without any SQ) but not zero-valued SQ data (SQ exists but the metric value is zero). This diff adds guards in `TransformToNewSQ` to skip metrics where the status quo mean is near-zero, with a warning so users know the transform was skipped. The `relativize()` utility itself still raises on zero control -- we only prevent calling it with zero args. Two code paths are guarded: - `transform_experiment_data` (vectorized/DataFrame path): checks target SQ and source trial SQ means before calling `relativize()`. - `_get_rel_mean_sem` (per-observation path): same guard, needed for untransform symmetry so that predictions are not incorrectly un-transformed for metrics that were never transformed. Meta: this unblocks Ax experiment `ifu_rbvm_session_proxy_pts` Reviewed By: Balandat Differential Revision: D97357997 fbshipit-source-id: e41f2d57998cd42ca03ab1d43cadbe4615fb0be1
diff --git a/ax/adapter/torch.py b/ax/adapter/torch.py
@@ -460,6 +460,13 @@ def _convert_experiment_data(
             # Drop NaN columns from means & corresponding params.
             outcome_means = mean_and_params[outcome_col_name].to_numpy()
             to_keep = ~np.isnan(outcome_means)
+            if not np.any(to_keep):
+                logger.warning(
+                    f"Skipping outcome '{outcome}': no non-NaN observations "
+                    f"remain after filtering. This can happen when a metric "
+                    f"has data in only a subset of trials."
+                )
+                continue
             Y = torch.from_numpy(outcome_means[to_keep]).double().view(-1, 1)
             X = torch.from_numpy(params_np[to_keep]).double()
             sem = sems_df[outcome].to_numpy()[to_keep]
diff --git a/ax/adapter/transforms/tests/test_transform_to_new_sq.py b/ax/adapter/transforms/tests/test_transform_to_new_sq.py
@@ -6,6 +6,7 @@
 # pyre-strict
 
 
+import logging
 import unittest
 from copy import deepcopy
 from unittest import mock
@@ -427,3 +428,55 @@ def test_non_relativizable_trial_preserved(self) -> None:
                 transformed.observation_data.loc[0]["sem", "branin"],
             )
         )
+
+    def test_zero_sq_metric_skipped_with_warning(self) -> None:
+        """Metrics whose status quo mean is near-zero (e.g.,
+        ExpressionDerivedMetric that is already relativized) should be
+        skipped with a warning rather than crashing on division by zero.
+        """
+        sobol = get_sobol(search_space=self.exp.search_space)
+        for sq_val in (2.0, 3.0):
+            t = self.exp.new_batch_trial(
+                generator_run=sobol.gen(2), should_add_status_quo_arm=True
+            ).mark_completed(unsafe=True)
+            data = get_branin_data_batch(batch=t)
+            data.df.loc[(data.df["arm_name"] == "status_quo"), "mean"] = sq_val
+            self.exp.attach_data(data=data)
+        self._refresh_adapter()
+
+        experiment_data = extract_experiment_data(
+            experiment=self.exp, data_loader_config=DataLoaderConfig()
+        )
+
+        tf = TransformToNewSQ(
+            search_space=None,
+            adapter=self.adapter,
+            config={"target_trial_index": 2},
+        )
+
+        # Set the target trial's SQ mean to zero, simulating an
+        # ExpressionDerivedMetric whose SQ is naturally zero.
+        tf.status_quo_data_by_trial[2].means[0] = 0.0
+
+        with self.assertLogs(
+            "ax.adapter.transforms.transform_to_new_sq", level=logging.WARNING
+        ) as cm:
+            transformed_data = tf.transform_experiment_data(
+                experiment_data=deepcopy(experiment_data)
+            )
+
+        # Verify a warning was emitted for the skipped metric.
+        self.assertTrue(
+            any("near-zero" in msg for msg in cm.output),
+            f"Expected a near-zero warning, got: {cm.output}",
+        )
+
+        # Data for all non-target trials should be unchanged (no transform
+        # was applied for the metric with zero SQ).
+        for t_idx in (0, 1):
+            orig = experiment_data.observation_data.loc[t_idx]
+            orig_non_sq = orig[
+                orig.index.get_level_values("arm_name") != self.adapter.status_quo_name
+            ]
+            tf_data = transformed_data.observation_data.loc[t_idx]
+            assert_frame_equal(orig_non_sq, tf_data)
diff --git a/ax/adapter/transforms/transform_to_new_sq.py b/ax/adapter/transforms/transform_to_new_sq.py
@@ -8,6 +8,7 @@
 
 from __future__ import annotations
 
+import logging
 from collections.abc import Callable
 from typing import TYPE_CHECKING
 
@@ -21,9 +22,11 @@
 from ax.core.search_space import SearchSpace
 from ax.core.utils import get_target_trial_index
 from ax.generators.types import TConfig
-from ax.utils.stats.math_utils import relativize, unrelativize
+from ax.utils.stats.math_utils import MEAN_CONTROL_EPSILON, relativize, unrelativize
 from pyre_extensions import assert_is_instance, none_throws
 
+logger: logging.Logger = logging.getLogger(__name__)
+
 if TYPE_CHECKING:
     # import as module to make sphinx-autodoc-typehints happy
     from ax import adapter as adapter_module  # noqa F401
@@ -127,6 +130,19 @@ def transform_experiment_data(
             if metric not in target_sq_data.metric_signatures:
                 continue
 
+            # Check target SQ mean first -- if near-zero, relativization is
+            # undefined (unrelativization would collapse all values to zero).
+            target_j = get_metric_index(data=target_sq_data, metric_signature=metric)
+            target_mean_c = target_sq_data.means[target_j]
+            if np.abs(target_mean_c) < MEAN_CONTROL_EPSILON:
+                logger.warning(
+                    f"Skipping TransformToNewSQ for metric '{metric}': "
+                    f"target trial status quo mean is near-zero "
+                    f"({target_mean_c}). This can happen when the metric "
+                    f"is already relativized (e.g., ExpressionDerivedMetric)."
+                )
+                continue
+
             # Build per-row control arrays from each trial's SQ data.
             mean_c, sem_c = [], []
             for idx in trial_indices[transform_mask]:
@@ -135,18 +151,26 @@ def transform_experiment_data(
                 mean_c.append(sq_data.means[j])
                 sem_c.append(sq_data.covariance[j, j] ** 0.5)
 
+            mean_c_arr = np.array(mean_c)
+            if np.any(np.abs(mean_c_arr) < MEAN_CONTROL_EPSILON):
+                logger.warning(
+                    f"Skipping TransformToNewSQ for metric '{metric}': "
+                    f"one or more trial status quo means are near-zero. "
+                    f"This can happen when the metric is already relativized "
+                    f"(e.g., ExpressionDerivedMetric)."
+                )
+                continue
+
             means_rel, sems_rel = relativize(
                 means_t=observation_data.loc[transform_mask, ("mean", metric)],
                 sems_t=observation_data.loc[transform_mask, ("sem", metric)],
-                mean_c=np.array(mean_c),
+                mean_c=mean_c_arr,
                 sem_c=np.array(sem_c),
                 as_percent=False,
                 control_as_constant=self.control_as_constant,
             )
 
             # Unrelativize with respect to target trial's status quo.
-            target_j = get_metric_index(data=target_sq_data, metric_signature=metric)
-            target_mean_c = target_sq_data.means[target_j]
             abs_target_mean_c = np.abs(target_mean_c)
             observation_data.loc[transform_mask, ("mean", metric)] = (
                 means_rel * abs_target_mean_c + target_mean_c
@@ -232,6 +256,19 @@ def _get_rel_mean_sem(
         j = get_metric_index(data=target_status_quo_data, metric_signature=metric)
         target_mean_c = target_status_quo_data.means[j]
         abs_target_mean_c = np.abs(target_mean_c)
+        # Skip if control or target SQ mean is near-zero -- relativization
+        # is undefined (division by zero).  The guard here is needed for
+        # untransform symmetry: if transform_experiment_data skipped a
+        # metric, the untransform path must also skip it.
+        if abs_target_mean_c < MEAN_CONTROL_EPSILON or (
+            np.abs(mean_c) < MEAN_CONTROL_EPSILON
+        ):
+            logger.warning(
+                f"Skipping TransformToNewSQ for metric '{metric}': "
+                f"status quo mean is near-zero (target={target_mean_c}, "
+                f"control={mean_c})."
+            )
+            return means_t, sems_t
         if rel_op == unrelativize:
             means_t = (means_t - target_mean_c) / abs_target_mean_c
             sems_t = sems_t / abs_target_mean_c
diff --git a/ax/utils/stats/math_utils.py b/ax/utils/stats/math_utils.py
@@ -9,6 +9,10 @@
 import numpy as np
 import numpy.typing as npt
 
+# Minimum absolute value for a control mean to be considered non-zero
+# for relativization via the delta method.
+MEAN_CONTROL_EPSILON: float = 1e-10
+
 
 def relativize(
     means_t: npt.NDArray | list[float] | float,
@@ -83,8 +87,7 @@ def relativize(
 
     """
     # if mean_c is too small, bail
-    epsilon = 1e-10
-    if np.any(np.abs(mean_c) < epsilon):
+    if np.any(np.abs(mean_c) < MEAN_CONTROL_EPSILON):
         raise ValueError(
             "mean_control ({} +/- {}) is smaller than 1 in 10 billion, "
             "which is too small to reliably analyze ratios using the delta "