Fix bug in optimize_acqf_mixed_alternating that may produce candidates that have invalid values (meta-pytorch#3212)

David Eriksson · meta-codesync[bot] · commit ad688792f5a5 · 2026-03-04T10:08:10.000-08:00
Summary: Pull Request resolved: meta-pytorch#3212 When using parameter constraints placed on discrete parameters, `optimize_acqf_mixed_alternating` may produce candidates that have invalid values due to a weird interaction between `project_to_feasible_space_via_slsqp` and the `post_processing_func`. For example, `project_to_feasible_space_via_slsqp` may end up moving a discrete parameter during the continuous step which causes it to later be rounded to an invalid value by the `post_processing_func`. The solution is to do two things: 1. Fix all discrete parameters during the continuous step so they aren't modified by `project_to_feasible_space_via_slsqp`. 2. Modify `_setup_continuous_relaxation` to not apply continuous relaxation to discrete parameters that are part of a parameter constraint (independently of their cardinality). Reviewed By: saitcakmak, ltiao Differential Revision: D94963154 fbshipit-source-id: 30b952f93a804736a90d287ececc6c3c57e8ba89
diff --git a/botorch/optim/optimize.py b/botorch/optim/optimize.py
@@ -535,6 +535,7 @@ def _optimize_batch_candidates() -> tuple[Tensor, Tensor, list[Warning]]:
             bounds=opt_inputs.bounds,
             equality_constraints=equality_constraints,
             inequality_constraints=inequality_constraints,
+            fixed_features=opt_inputs.fixed_features,
         )
         if opt_inputs.post_processing_func is not None:
             projected_candidates = opt_inputs.post_processing_func(projected_candidates)
diff --git a/botorch/optim/optimize_mixed.py b/botorch/optim/optimize_mixed.py
@@ -121,17 +121,32 @@ def _setup_continuous_relaxation(
     discrete_dims: dict[int, list[float]],
     max_discrete_values: int,
     post_processing_func: Callable[[Tensor], Tensor] | None,
+    inequality_constraints: list[tuple[Tensor, Tensor, float]] | None = None,
+    equality_constraints: list[tuple[Tensor, Tensor, float]] | None = None,
 ) -> tuple[list[int], Callable[[Tensor], Tensor] | None]:
     r"""Update ``discrete_dims`` and ``post_processing_func`` to use
     continuous relaxation for discrete dimensions that have more than
     ``max_discrete_values`` values. These dimensions are removed from
     ``discrete_dims`` and ``post_processing_func`` is updated to round
     them to the nearest integer.
+
+    Dimensions that participate in constraints are NOT relaxed, as rounding
+    after projection could violate those constraints.
     """
 
+    # Identify dimensions involved in constraints
+    constrained_dims: set[int] = set()
+    for constraints in [inequality_constraints, equality_constraints]:
+        if constraints is not None:
+            for indices, _, _ in constraints:
+                constrained_dims.update(indices.tolist())
+
     dims_to_relax, dims_to_keep = {}, {}
     for index, values in discrete_dims.items():
-        if len(values) > max_discrete_values:
+        # Don't relax dimensions that participate in constraints
+        if index in constrained_dims:
+            dims_to_keep[index] = values
+        elif len(values) > max_discrete_values:
             dims_to_relax[index] = values
         else:
             dims_to_keep[index] = values
@@ -839,8 +854,7 @@ def continuous_step(
             This function utilizes ``acq_function``, ``bounds``, ``options``,
             ``fixed_features`` and constraints from ``opt_inputs``.
             ``opt_inputs.return_best_only`` should be ``False``.
-        discrete_dims: A dictionary mapping indices of discrete dimensions
-            to a list of allowed values for that dimension.
+        discrete_dims: A tensor of indices corresponding to discrete dimensions.
         cat_dims: A tensor of indices corresponding to categorical parameters.
         current_x: Starting point. A tensor of shape ``b x d``.
 
@@ -1032,6 +1046,8 @@ def optimize_acqf_mixed_alternating(
             options.get("max_discrete_values", MAX_DISCRETE_VALUES), int
         ),
         post_processing_func=post_processing_func,
+        inequality_constraints=inequality_constraints,
+        equality_constraints=equality_constraints,
     )
 
     opt_inputs = OptimizeAcqfInputs(
diff --git a/botorch/optim/parameter_constraints.py b/botorch/optim/parameter_constraints.py
@@ -17,7 +17,7 @@
 import numpy.typing as npt
 import torch
 from botorch.exceptions.errors import CandidateGenerationError, UnsupportedError
-from botorch.optim.utils import columnwise_clamp
+from botorch.optim.utils import columnwise_clamp, fix_features as apply_fix_features
 from scipy.optimize import Bounds, minimize
 from torch import Tensor
 
@@ -724,6 +724,7 @@ def project_to_feasible_space_via_slsqp(
     bounds: Tensor,
     inequality_constraints: list[tuple[Tensor, Tensor, float]] | None = None,
     equality_constraints: list[tuple[Tensor, Tensor, float]] | None = None,
+    fixed_features: dict[int, float | Tensor] | None = None,
 ) -> Tensor:
     """Project X onto the feasible space by solving a quadratic program.
 
@@ -743,15 +744,38 @@ def project_to_feasible_space_via_slsqp(
             ``coefficients`` should be torch tensors. See the docstring of
             ``make_scipy_linear_constraints`` for an example.
         equality_constraints: A list of tuples (indices, coefficients, rhs).
+        fixed_features: A dictionary mapping feature indices to their fixed values.
+            These dimensions will not be modified during projection. Values can be
+            scalars (applied to all elements) or 1D tensors matching the batch size
+            of X (for per-element fixed values).
 
     Returns:
-        A ``(batch_shape x) n x d``-dim tensor of  projected values.
+        A ``(batch_shape x) n x d``-dim tensor of projected values.
     """
     if inequality_constraints is None and equality_constraints is None:
         return X
-    bounds_scipy = make_scipy_bounds(
-        X=X, lower_bounds=bounds[0], upper_bounds=bounds[1]
-    )
+
+    d = X.shape[-1]
+    lb = _arrayify(bounds[0].expand_as(X)).flatten()
+    ub = _arrayify(bounds[1].expand_as(X)).flatten()
+
+    # If there are fixed features, constrain those dimensions by setting their
+    # bounds to equal the current value. This prevents the optimizer from
+    # modifying them during projection. We use fix_features to apply the fixed
+    # values to X, then extract the values for setting the bounds.
+    if fixed_features:
+        X_fixed = apply_fix_features(X, fixed_features, replace_current_value=True)
+        # Set bounds for fixed dimensions to match the fixed values
+        X_fixed_flat = _arrayify(X_fixed).flatten()
+        for idx in fixed_features.keys():
+            # For each row in the flattened structure, set bounds at dimension idx
+            n_rows = X.numel() // d
+            for i in range(n_rows):
+                flat_idx = i * d + idx
+                lb[flat_idx] = X_fixed_flat[flat_idx]
+                ub[flat_idx] = X_fixed_flat[flat_idx]
+
+    bounds_scipy = Bounds(lb=lb, ub=ub, keep_feasible=True)
     constraints = make_scipy_linear_constraints(
         shapeX=X.shape,
         inequality_constraints=inequality_constraints,
@@ -789,6 +813,6 @@ def grad_objective(x: np.ndarray):
     )
 
     if not result.success:
-        raise RuntimeError(f"Optimization failed: {result.message}")
+        raise CandidateGenerationError(f"Optimization failed: {result.message}")
 
     return torch.from_numpy(result.x).to(X).view(X.shape)
diff --git a/test/optim/test_optimize_mixed.py b/test/optim/test_optimize_mixed.py
@@ -1440,6 +1440,8 @@ def test_optimize_acqf_mixed_continuous_relaxation(self) -> None:
                 discrete_dims=discrete_dims,
                 max_discrete_values=max_discrete_values or MAX_DISCRETE_VALUES,
                 post_processing_func=post_processing_func,
+                inequality_constraints=None,
+                equality_constraints=None,
             )
             discrete_call_args = wrapped_discrete.call_args.kwargs
             expected_dims = [0, 4] if max_discrete_values is None else [0]
@@ -1516,3 +1518,113 @@ def org_post_proc_func(X: Tensor) -> Tensor:
         # Check that generated points are rounded.
         self.assertEqual(X.shape, torch.Size([4, train_X.shape[-1]]))
         self.assertAllClose(X[..., all_integer_dims], X[..., all_integer_dims].round())
+
+    def test_setup_continuous_relaxation_excludes_constrained_dims(self) -> None:
+        """Test that _setup_continuous_relaxation keeps constrained discrete dims."""
+        for dtype in (torch.float, torch.double):
+            # Setup: 3 discrete dimensions
+            # - Dim 0: Low cardinality (2 values) - kept regardless
+            # - Dim 1: High cardinality (50 values), participates in constraint - kept
+            # - Dim 2: High cardinality (50 values), not constrained - relaxed
+            discrete_dims: dict[int, list[float]] = {
+                0: [0.0, 1.0],  # Low cardinality - should be kept
+                1: list(range(50)),  # High cardinality, constrained - should be kept
+                2: list(range(50)),  # High cardinality, not constrained - relaxed
+            }
+            max_discrete_values = 20
+            # Constraint on dim 1: x[1] >= 10
+            inequality_constraints = [
+                (
+                    torch.tensor([1], dtype=torch.long, device=self.device),
+                    torch.tensor([1.0], dtype=dtype, device=self.device),
+                    10.0,
+                )
+            ]
+            # Execute: call _setup_continuous_relaxation
+            dims_kept, post_processing_func = _setup_continuous_relaxation(
+                discrete_dims=discrete_dims,
+                max_discrete_values=max_discrete_values,
+                post_processing_func=None,
+                inequality_constraints=inequality_constraints,
+            )
+            # Assert: dims 0 and 1 are kept (low cardinality and constrained)
+            self.assertIn(0, dims_kept)
+            self.assertIn(1, dims_kept)
+            # Assert: dim 2 is NOT in dims_kept (relaxed)
+            self.assertNotIn(2, dims_kept)
+            # Assert: post_processing_func is not None since dim 2 was relaxed
+            self.assertIsNotNone(post_processing_func)
+            # Assert: post_processing_func rounds dim 2 but not dims 0 or 1
+            X = torch.tensor(
+                [0.4, 25.3, 30.7],  # dim 0, 1, 2 with non-integer values
+                dtype=dtype,
+                device=self.device,
+            )
+            X_processed = post_processing_func(X)
+            # Dim 0 and 1 should remain unchanged (not rounded by this func)
+            self.assertAllClose(
+                X_processed[0], torch.tensor(0.4, dtype=dtype, device=self.device)
+            )
+            self.assertAllClose(
+                X_processed[1], torch.tensor(25.3, dtype=dtype, device=self.device)
+            )
+            # Dim 2 should be rounded to nearest valid value
+            self.assertAllClose(
+                X_processed[2], torch.tensor(31.0, dtype=dtype, device=self.device)
+            )
+
+    def test_optimize_acqf_mixed_alternating_constrained_discrete_dims(self) -> None:
+        """Test full workflow produces valid discrete values with constrained dims.
+
+        Uses non-contiguous choices [8, 16, 24, 32, 40, 48] to exercise the failure
+        mode where rounding to nearest integer (e.g. 47) differs from rounding to
+        nearest valid choice (48).
+        """
+        for dtype in (torch.float, torch.double):
+            # Setup: GP model with posterior mean as acquisition function
+            d = 2  # 1 continuous + 1 discrete dimension
+            train_X = torch.rand(5, d, dtype=dtype, device=self.device)
+            # Non-contiguous discrete values: multiples of 8 from 8 to 48
+            valid_choices = [8.0, 16.0, 24.0, 32.0, 40.0, 48.0]
+            train_X[:, 1] = torch.tensor(
+                [valid_choices[i % len(valid_choices)] for i in range(5)],
+                dtype=dtype,
+                device=self.device,
+            )
+            train_Y = train_X.sum(dim=-1, keepdim=True)
+            model = SingleTaskGP(train_X, train_Y)
+            acqf = PosteriorMean(model=model)
+            # Define bounds: [0, 1] for continuous, [8, 48] for discrete
+            bounds = torch.tensor(
+                [[0.0, 8.0], [1.0, 48.0]], dtype=dtype, device=self.device
+            )
+            # Non-contiguous discrete dimension (6 values)
+            discrete_dims: dict[int, list[float]] = {1: valid_choices}
+            # Constraint: x[1] >= 20 (discrete dim must be at least 20)
+            inequality_constraints = [
+                (
+                    torch.tensor([1], dtype=torch.long, device=self.device),
+                    torch.tensor([1.0], dtype=dtype, device=self.device),
+                    20.0,
+                )
+            ]
+            X, _ = optimize_acqf_mixed_alternating(
+                acq_function=acqf,
+                bounds=bounds,
+                discrete_dims=discrete_dims,
+                q=1,
+                num_restarts=2,
+                raw_samples=32,
+                inequality_constraints=inequality_constraints,
+                options={"max_discrete_values": 2, "maxiter_alternating": 4},
+            )
+            # Assert: discrete value is within the valid set (not just rounded int)
+            valid_choices_tensor = torch.tensor(
+                valid_choices, dtype=dtype, device=self.device
+            )
+            self.assertTrue(
+                torch.all(torch.isin(X[..., 1], valid_choices_tensor)),
+                f"Returned candidate {X[..., 1].item()} not in {valid_choices}",
+            )
+            # Assert: constraint is satisfied (x[1] >= 20)
+            self.assertTrue(torch.all(X[..., 1] >= 20.0 - 1e-6))
diff --git a/test/optim/test_parameter_constraints.py b/test/optim/test_parameter_constraints.py
@@ -1003,7 +1003,9 @@ def test_project_to_feasible_space_via_slsqp_exception(self, _: mock.Mock) -> No
         bounds = torch.tensor([[0.0, 0.0], [2.0, 2.0]], device=self.device)
 
         X = torch.tensor([[1.0, 1.0]], device=self.device)
-        with self.assertRaisesRegex(RuntimeError, "Optimization failed: failed reason"):
+        with self.assertRaisesRegex(
+            CandidateGenerationError, "Optimization failed: failed reason"
+        ):
             project_to_feasible_space_via_slsqp(
                 X=X,
                 bounds=bounds,
@@ -1015,3 +1017,92 @@ def test_project_to_feasible_space_via_slsqp_exception(self, _: mock.Mock) -> No
                     )
                 ],
             )
+
+    def test_project_to_feasible_space_with_scalar_fixed_features(self) -> None:
+        """Test projection preserves scalar fixed_features values."""
+        for dtype in (torch.float, torch.double):
+            tol = get_constraint_tolerance(dtype=dtype)
+            # Setup: 3D search space, bounds [[0, 0, 0], [2, 2, 2]]
+            bounds = torch.tensor(
+                [[0.0, 0.0, 0.0], [2.0, 2.0, 2.0]], dtype=dtype, device=self.device
+            )
+            # Constraint: x[0] + x[1] >= 1.5
+            inequality_constraints = [
+                (
+                    torch.tensor([0, 1], dtype=torch.long, device=self.device),
+                    torch.tensor([1.0, 1.0], dtype=dtype, device=self.device),
+                    1.5,
+                )
+            ]
+            # Infeasible point X = [[0.3, 0.3, 1.0]] (0.6 < 1.5)
+            X = torch.tensor([[0.3, 0.3, 1.0]], dtype=dtype, device=self.device)
+            # fixed_features = {0: 0.3} (scalar)
+            fixed_features: dict[int, float | torch.Tensor] = {0: 0.3}
+            # Execute: project to feasible space with fixed_features
+            projected = project_to_feasible_space_via_slsqp(
+                X=X,
+                bounds=bounds,
+                inequality_constraints=inequality_constraints,
+                fixed_features=fixed_features,
+            )
+            # Assert: x[0] remains at 0.3 (fixed)
+            self.assertAllClose(
+                projected[0, 0], torch.tensor(0.3, dtype=dtype, device=self.device)
+            )
+            # Assert: constraint is satisfied (x[0] + x[1] >= 1.5)
+            self.assertGreaterEqual(
+                (projected[0, 0] + projected[0, 1]).item(), 1.5 - tol
+            )
+            # Assert: bounds are respected
+            self.assertTrue(torch.all(projected >= bounds[0] - tol))
+            self.assertTrue(torch.all(projected <= bounds[1] + tol))
+
+    def test_project_to_feasible_space_with_batched_fixed_features(self) -> None:
+        """Test projection preserves batched (tensor) fixed_features values."""
+        for dtype in (torch.float, torch.double):
+            tol = get_constraint_tolerance(dtype=dtype)
+            # Setup: 3D search space, bounds [[0, 0, 0], [2, 2, 2]]
+            bounds = torch.tensor(
+                [[0.0, 0.0, 0.0], [2.0, 2.0, 2.0]], dtype=dtype, device=self.device
+            )
+            # Constraint: x[0] + x[1] >= 1.5
+            inequality_constraints = [
+                (
+                    torch.tensor([0, 1], dtype=torch.long, device=self.device),
+                    torch.tensor([1.0, 1.0], dtype=dtype, device=self.device),
+                    1.5,
+                )
+            ]
+            # Batch of 3 infeasible points (all violate x[0] + x[1] >= 1.5)
+            # X must be 3D: batch x q x d when using tensor fixed_features
+            X = torch.tensor(
+                [
+                    [[0.2, 0.3, 1.0]],  # batch 0, q=1
+                    [[0.4, 0.5, 0.5]],  # batch 1, q=1
+                    [[0.1, 0.2, 1.5]],  # batch 2, q=1
+                ],
+                dtype=dtype,
+                device=self.device,
+            )  # Shape: [3, 1, 3]
+            # fixed_features = {0: tensor([0.2, 0.4, 0.1])} (different per batch)
+            fixed_values = torch.tensor(
+                [0.2, 0.4, 0.1], dtype=dtype, device=self.device
+            )
+            fixed_features: dict[int, float | torch.Tensor] = {0: fixed_values}
+            # Execute: project to feasible space with batched fixed_features
+            projected = project_to_feasible_space_via_slsqp(
+                X=X,
+                bounds=bounds,
+                inequality_constraints=inequality_constraints,
+                fixed_features=fixed_features,
+            )
+            # Assert: each batch element preserves its respective fixed value for x[0]
+            self.assertAllClose(projected[:, 0, 0], fixed_values)
+            # Assert: constraint is satisfied for each batch element
+            for i in range(3):
+                self.assertGreaterEqual(
+                    (projected[i, 0, 0] + projected[i, 0, 1]).item(), 1.5 - tol
+                )
+            # Assert: bounds are respected
+            self.assertTrue(torch.all(projected >= bounds[0] - tol))
+            self.assertTrue(torch.all(projected <= bounds[1] + tol))

Original file line number	Diff line number	Diff line change
`@@ -535,6 +535,7 @@ def _optimize_batch_candidates() -> tuple[Tensor, Tensor, list[Warning]]:`
`535`	`535`	`bounds=opt_inputs.bounds,`
`536`	`536`	`equality_constraints=equality_constraints,`
`537`	`537`	`inequality_constraints=inequality_constraints,`
	`538`	`+ fixed_features=opt_inputs.fixed_features,`
`538`	`539`	`)`
`539`	`540`	`if opt_inputs.post_processing_func is not None:`
`540`	`541`	`projected_candidates = opt_inputs.post_processing_func(projected_candidates)`