Fix incorrect hypervolume reference point for minimization objectives with inferred thresholds (#4970)

sdaulton · meta-codesync[bot] · commit 72a3a58f33e2 · 2026-03-04T14:04:44.000-08:00
Summary: Pull Request resolved: #4970 `get_hypervolume_trace_of_outcomes_multi_objective` negates metric values for minimization objectives to convert them to maximization convention (line 784). However, when inferring objective thresholds from data, the code branched on `obj.minimize` to pick `max()` vs `min()` — not accounting for the already-negated data. This caused the inferred reference point to be the *best* observed value (not the worst), placing it above all data points and yielding a hypervolume of 0 for every trial. The fix separates the two cases: - **Explicit thresholds**: bound is in the original metric space, so negate for minimization (unchanged). - **Inferred thresholds**: data is already in maximization convention, so the worst value is simply `min()` — no further sign flip needed. Additionally, this diff changes how the reference point is inferred in `get_hypervolume_trace_of_outcomes_multi_objective`. Previously, the reference point was set to the worst observed objective values. Now, we use `infer_reference_point` from BoTorch to compute a scaled nadir point from the Pareto frontier of feasible observations. This approach scales the nadir by a factor (default 0.1) to ensure the reference point lies slightly below (worse than) the Pareto front, providing a more robust and theoretically grounded reference point for hypervolume computation. This bug affected `UtilityProgressionAnalysis` (and any caller of `get_trace`) for MOO experiments with minimization objectives that lack explicit objective thresholds. Reviewed By: mpolson64 Differential Revision: D94783465 fbshipit-source-id: c06209e10afadc562fb221dcae3a19019f7d4bdc
diff --git a/ax/service/tests/test_best_point.py b/ax/service/tests/test_best_point.py
@@ -90,11 +90,19 @@ def test_get_trace(self) -> None:
         )
         self.assertEqual(get_trace(exp), [1, 1, 2, 9, 11, 11])
 
-        # W/o ObjectiveThresholds (infering ObjectiveThresholds from nadir point)
+        # W/o ObjectiveThresholds (inferring ObjectiveThresholds from scaled nadir)
         assert_is_instance(
             exp.optimization_config, MultiObjectiveOptimizationConfig
         ).objective_thresholds = []
-        self.assertEqual(get_trace(exp), [0.0, 0.0, 2.0, 8.0, 11.0, 11.0])
+        trace = get_trace(exp)
+        # With inferred thresholds via scaled nadir, check trace properties:
+        # - All values should be non-negative
+        self.assertTrue(all(v >= 0.0 for v in trace))
+        # - Trace should be non-decreasing (cumulative best)
+        for i in range(1, len(trace)):
+            self.assertGreaterEqual(trace[i], trace[i - 1])
+        # - Final value should be positive (non-trivial HV)
+        self.assertGreater(trace[-1], 0.0)
 
         # Multi-objective w/ constraints.
         exp = get_experiment_with_observations(
diff --git a/ax/service/tests/test_best_point_utils.py b/ax/service/tests/test_best_point_utils.py
@@ -168,15 +168,50 @@ def test_get_hypervolume_trace_of_outcomes_multi_objective(self) -> None:
                 optimization_config=optimization_config,
                 use_cumulative_hv=True,
             )
-            self.assertEqual(hvs, [0.0, 2.0, 2.0, 3.0])
+            # Inferred ref point is (1.9, 1.9) from Pareto front {(2,3),(3,2)}.
+            np.testing.assert_allclose(hvs, [0.0, 0.11, 0.11, 0.21], atol=1e-10)
 
         with self.subTest("Non-cumulative HV"):
             hvs = get_hypervolume_trace_of_outcomes_multi_objective(
                 df_wide=df_wide,
                 optimization_config=optimization_config,
                 use_cumulative_hv=False,
             )
-            self.assertEqual(hvs, [0.0, 2.0, 0.0, 2.0])
+            np.testing.assert_allclose(hvs, [0.0, 0.11, 0.0, 0.11], atol=1e-10)
+
+    def test_get_hypervolume_trace_minimization_inferred_thresholds(self) -> None:
+        """Test that inferred thresholds work correctly with minimization
+        objectives. Regression test for a bug where the reference point was
+        computed from already-negated data but treated as un-negated, causing
+        the reference point to dominate all observations (yielding 0 HV).
+        """
+        objective = MultiObjective(
+            objectives=[
+                Objective(metric=Metric("m1"), minimize=True),
+                Objective(metric=Metric("m2"), minimize=True),
+            ],
+        )
+        optimization_config = MultiObjectiveOptimizationConfig(
+            objective=objective,
+        )
+        df_wide = pd.DataFrame.from_records(
+            [
+                {"m1": 3.0, "m2": 1.0, "feasible": True},
+                {"m1": 1.0, "m2": 3.0, "feasible": True},
+                {"m1": 7.0, "m2": 7.0, "feasible": True},
+                {"m1": 2.0, "m2": 2.0, "feasible": True},
+            ]
+        )
+        hvs = get_hypervolume_trace_of_outcomes_multi_objective(
+            df_wide=df_wide.copy(),
+            optimization_config=optimization_config,
+            use_cumulative_hv=True,
+        )
+        # All HVs should be positive (before the fix, they were all 0.0)
+        self.assertGreater(hvs[-1], 0.0)
+        # The trace should be non-decreasing (cumulative best)
+        for i in range(1, len(hvs)):
+            self.assertGreaterEqual(hvs[i], hvs[i - 1])
 
     def test_get_trace_by_arm_pull_from_data(self) -> None:
         objective = Objective(metric=Metric("m1"), minimize=False)
@@ -318,22 +353,26 @@ def test_get_trace_by_arm_pull_from_data(self) -> None:
                 ],
             ),
         )
-        # reference point inferred to be [1, 0]
+        # reference point inferred via infer_reference_point on Pareto front
         with self.subTest("Multi-objective, cumulative"):
             result = get_trace_by_arm_pull_from_data(
                 df=df, optimization_config=moo_opt_config, use_cumulative_best=True
             )
             self.assertEqual(len(result), 3)
             self.assertEqual(set(result.columns), {"trial_index", "arm_name", "value"})
-            self.assertEqual(result["value"].tolist(), [0.0, 0.0, 2.0])
+            np.testing.assert_allclose(
+                result["value"].tolist(), [0.22, 0.22, 0.42], atol=1e-10
+            )
 
         with self.subTest("Multi-objective, non-cumulative"):
             result = get_trace_by_arm_pull_from_data(
                 df=df, optimization_config=moo_opt_config, use_cumulative_best=False
             )
             self.assertEqual(len(result), 3)
             self.assertEqual(set(result.columns), {"trial_index", "arm_name", "value"})
-            self.assertEqual(result["value"].tolist(), [0.0, 0.0, 2.0])
+            np.testing.assert_allclose(
+                result["value"].tolist(), [0.22, 0.0, 0.22], atol=1e-10
+            )
 
     @mock_botorch_optimize
     def test_best_from_model_prediction(self) -> None:
diff --git a/ax/service/utils/best_point.py b/ax/service/utils/best_point.py
@@ -53,6 +53,7 @@
 from ax.utils.preference.preference_utils import get_preference_adapter
 from botorch.utils.multi_objective.box_decompositions import DominatedPartitioning
 from botorch.utils.multi_objective.hypervolume import infer_reference_point
+from botorch.utils.multi_objective.pareto import is_non_dominated
 from numpy.typing import NDArray
 from pyre_extensions import assert_is_instance, none_throws
 
@@ -738,7 +739,10 @@ def get_hypervolume_trace_of_outcomes_multi_objective(
         df_wide: Dataframe with columns ["feasible"] + relevant
             metrics. This can come from reshaping the data that comes from `Data.df`.
         optimization_config: A multi-objective optimization config with a
-            `MultiObjective` (not a `ScalarizedObjective`).
+            `MultiObjective` (not a `ScalarizedObjective`). When objective
+            thresholds are not provided, they are inferred using
+            ``infer_reference_point`` on the Pareto frontier of the feasible
+            observations.
         use_cumulative_hv: If True, the hypervolume returned is the cumulative
             hypervolume of the points in each row. Otherwise, this is the
             hypervolume of each point.
@@ -754,8 +758,21 @@ def get_hypervolume_trace_of_outcomes_multi_objective(
     ...             Objective(metric=Metric(name="m2"), minimize=False),
     ...         ]
     ...     ),
+    ...     objective_thresholds=[
+    ...         ObjectiveThreshold(
+    ...             metric=Metric(name="m1"),
+    ...             bound=0.0,
+    ...             relative=False,
+    ...             op=ComparisonOp.GEQ,
+    ...         ),
+    ...         ObjectiveThreshold(
+    ...             metric=Metric(name="m2"),
+    ...             bound=0.0,
+    ...             relative=False,
+    ...             op=ComparisonOp.GEQ,
+    ...         ),
+    ...     ],
     ... )
-    >>> # Objective threshols will be inferred to be zero
     >>> df_wide = pd.DataFrame.from_records(
     ...     [
     ...         {"m1": 0.0, "m2": 0.0, "feasible": True},
@@ -788,6 +805,8 @@ def get_hypervolume_trace_of_outcomes_multi_objective(
         threshold.metric.name: threshold
         for threshold in optimization_config.objective_thresholds
     }
+    # First pass: collect explicit thresholds, mark missing ones with NaN.
+    needs_inference = False
     for obj in objective.objectives:
         metric_name = obj.metric.name
         if metric_name in objective_thresholds_dict:
@@ -798,14 +817,49 @@ def get_hypervolume_trace_of_outcomes_multi_objective(
                     "`Derelativize` the optimization config, or use "
                     "`get_trace`."
                 )
+            # Explicit thresholds are in the original metric space, so negate
+            # for minimization objectives to match the negated data.
             bound = threshold.bound
+            objective_thresholds.append(-bound if obj.minimize else bound)
         else:
-            metric_vals = df_wide[metric_name]
-            bound = metric_vals.max() if obj.minimize else metric_vals.min()
-
-        objective_thresholds.append(-bound if obj.minimize else bound)
+            needs_inference = True
+            objective_thresholds.append(float("nan"))
+
+    if needs_inference:
+        # Infer missing thresholds using infer_reference_point on the
+        # observed Pareto frontier (data is already in maximization
+        # convention after negating minimization objectives above).
+        feasible_mask = df_wide["feasible"].to_numpy()
+        Y_feasible = torch.from_numpy(
+            df_wide.loc[feasible_mask, objective.metric_names].to_numpy().copy()
+        ).to(torch.double)
+        if Y_feasible.shape[0] > 0:
+            pareto_Y = Y_feasible[is_non_dominated(Y_feasible)]
+        else:
+            # No feasible points -- use all data as fallback.
+            Y_all = torch.from_numpy(
+                df_wide[objective.metric_names].to_numpy().copy()
+            ).to(torch.double)
+            pareto_Y = Y_all[is_non_dominated(Y_all)]
+
+        max_ref_point = torch.tensor(objective_thresholds, dtype=torch.double)
+        has_any_explicit = not max_ref_point.isnan().all()
+
+        inferred = infer_reference_point(
+            pareto_Y=pareto_Y,
+            max_ref_point=max_ref_point if has_any_explicit else None,
+            scale=0.1,
+        )
 
-    objective_thresholds = torch.tensor(objective_thresholds, dtype=torch.double)
+        if has_any_explicit:
+            # Replace NaN entries with inferred values.
+            objective_thresholds = torch.where(
+                max_ref_point.isnan(), inferred, max_ref_point
+            )
+        else:
+            objective_thresholds = inferred
+    else:
+        objective_thresholds = torch.tensor(objective_thresholds, dtype=torch.double)
 
     metrics_tensor = torch.from_numpy(df_wide[objective.metric_names].to_numpy().copy())
     return _compute_hv_trace(