Add InSampleUniformGenerator for model-free in-sample candidate selection (facebook#4987)

ItsMrLin · meta-codesync[bot] · commit a89122dfca42 · 2026-03-06T09:42:08.000-08:00
Summary: Pull Request resolved: facebook#4987 Add `InSampleUniformGenerator`, a `RandomGenerator` subclass that randomly selects `n` arms (without replacement) from existing experiment arms. This replaces the previous `in_sample` mode on `RandomAdapter` (which bypassed the generator entirely) with a proper generator class, following bletham's review feedback on the original diff. The generator overrides `gen()` to select from the `generated_points` array that the adapter already constructs from in-design, non-failed experiment arms (filtered, transformed, and deduplicated). This reuses existing infrastructure without adding new interface surface. Registered as `Generators.IN_SAMPLE_UNIFORM` in the adapter registry with `RandomAdapter` and `Cont_X_trans`, matching the pattern of other random generators (Sobol, Uniform). The resulting user-facing matrix is clean: | | Out-of-sample | In-sample | |---|---|---| | **Model-free** | `Generators.SOBOL` | `Generators.IN_SAMPLE_UNIFORM` | | **Model-based** | `Generators.BOTORCH_MODULAR` | `Generators.BOTORCH_MODULAR` + `model_gen_options={"in_sample": True}` | The asymmetry (enum swap for model-free, flag for model-based) reflects the real architectural difference: model-based in/out-of-sample share a fitted model; model-free in/out-of-sample share nothing. Reviewed By: bletham, saitcakmak Differential Revision: D94973263 fbshipit-source-id: 1cbaa958cf0514caf6b9d91f738e39f2c2cb516b
diff --git a/ax/adapter/registry.py b/ax/adapter/registry.py
@@ -56,6 +56,7 @@
 from ax.generators.discrete.eb_thompson import EmpiricalBayesThompsonSampler
 from ax.generators.discrete.full_factorial import FullFactorialGenerator
 from ax.generators.discrete.thompson import ThompsonSampler
+from ax.generators.random.in_sample import InSampleUniformGenerator
 from ax.generators.random.sobol import SobolGenerator
 from ax.generators.random.uniform import UniformGenerator
 from ax.generators.torch.botorch_modular.generator import (
@@ -215,6 +216,11 @@ class GeneratorSetup(NamedTuple):
         generator_class=UniformGenerator,
         transforms=Cont_X_trans,
     ),
+    "InSampleUniform": GeneratorSetup(
+        adapter_class=RandomAdapter,
+        generator_class=InSampleUniformGenerator,
+        transforms=Cont_X_trans,
+    ),
     "ST_MTGP": GeneratorSetup(
         adapter_class=TorchAdapter,
         generator_class=ModularBoTorchGenerator,
@@ -454,6 +460,7 @@ class Generators(GeneratorRegistryBase):
     EMPIRICAL_BAYES_THOMPSON = "EB"
     EB_ASHR = "EB_Ashr"
     UNIFORM = "Uniform"
+    IN_SAMPLE_UNIFORM = "InSampleUniform"
     ST_MTGP = "ST_MTGP"
     BO_MIXED = "BO_MIXED"
 
diff --git a/ax/generators/random/in_sample.py b/ax/generators/random/in_sample.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-strict
+
+from collections.abc import Callable
+
+import numpy as np
+import numpy.typing as npt
+from ax.core.search_space import SearchSpaceDigest
+from ax.generators.random.base import RandomGenerator
+from ax.generators.types import TConfig
+
+
+class InSampleUniformGenerator(RandomGenerator):
+    """Randomly select candidates from existing experiment arms.
+
+    Selects n arms uniformly at random without replacement from the
+    ``generated_points`` array passed by the adapter. This array contains
+    the in-design, non-failed arms on the experiment (deduplicated).
+
+    Used for model-free candidate selection in use cases like LILO
+    (Language-in-the-Loop Optimization), where a labeling node needs
+    to randomly select previously observed configurations without
+    fitting any surrogate model.
+
+    See base ``RandomGenerator`` for a description of model attributes.
+    """
+
+    def gen(
+        self,
+        n: int,
+        search_space_digest: SearchSpaceDigest,
+        linear_constraints: tuple[npt.NDArray, npt.NDArray] | None = None,
+        fixed_features: dict[int, float] | None = None,
+        model_gen_options: TConfig | None = None,
+        rounding_func: Callable[[npt.NDArray], npt.NDArray] | None = None,
+        generated_points: npt.NDArray | None = None,
+    ) -> tuple[npt.NDArray, npt.NDArray]:
+        """Select n candidates from ``generated_points``.
+
+        Args:
+            n: Number of candidates to select.
+            search_space_digest: A ``SearchSpaceDigest`` object containing
+                metadata on the features in the datasets.
+            linear_constraints: Not used. Accepted for interface compatibility.
+            fixed_features: Not used. Accepted for interface compatibility.
+            model_gen_options: Not used. Accepted for interface compatibility.
+            rounding_func: Not used. Accepted for interface compatibility.
+            generated_points: A numpy array of shape ``(num_arms, d)`` containing
+                the existing experiment arms to select from. Constructed by the
+                adapter from in-design, non-failed arms (deduplicated).
+
+        Returns:
+            2-element tuple containing
+
+            - ``(n, d)`` array of selected points.
+            - Uniform weights, an n-array of ones.
+
+        Raises:
+            ValueError: If ``generated_points`` is None or has fewer than
+                ``n`` rows.
+        """
+        available = 0 if generated_points is None else len(generated_points)
+        if generated_points is None or available < n:
+            raise ValueError(
+                f"Cannot select {n} arms: only {available} eligible "
+                f"arms available on the experiment."
+            )
+
+        rng = np.random.default_rng(seed=self.seed + self.init_position)
+        indices = rng.choice(len(generated_points), size=n, replace=False)
+        self.init_position += n
+        return generated_points[indices], np.ones(n)
+
+    def _gen_samples(self, n: int, tunable_d: int, bounds: npt.NDArray) -> npt.NDArray:
+        raise NotImplementedError(
+            "InSampleUniformGenerator selects from existing points "
+            "and does not generate new samples."
+        )
diff --git a/ax/generators/tests/test_in_sample.py b/ax/generators/tests/test_in_sample.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-strict
+
+import numpy as np
+from ax.core.search_space import SearchSpaceDigest
+from ax.generators.random.in_sample import InSampleUniformGenerator
+from ax.utils.common.testutils import TestCase
+
+
+class InSampleUniformGeneratorTest(TestCase):
+    def setUp(self) -> None:
+        super().setUp()
+        self.generated_points = np.array(
+            [
+                [0.1, 0.2],
+                [0.3, 0.4],
+                [0.5, 0.6],
+                [0.7, 0.8],
+                [0.9, 1.0],
+            ]
+        )
+        self.ssd = SearchSpaceDigest(
+            feature_names=["x0", "x1"],
+            bounds=[(0.0, 1.0), (0.0, 1.0)],
+        )
+
+    def test_basic_selection(self) -> None:
+        generator = InSampleUniformGenerator(seed=0)
+        points, weights = generator.gen(
+            n=2,
+            search_space_digest=self.ssd,
+            generated_points=self.generated_points,
+        )
+        self.assertEqual(points.shape, (2, 2))
+        self.assertTrue(np.all(weights == 1.0))
+        # Each selected row must be present in the original set.
+        for row in points:
+            self.assertTrue(
+                any(np.array_equal(row, gp) for gp in self.generated_points)
+            )
+
+    def test_selects_all(self) -> None:
+        """Selecting all points should return all of them (in some order)."""
+        generator = InSampleUniformGenerator(seed=0)
+        points, weights = generator.gen(
+            n=5,
+            search_space_digest=self.ssd,
+            generated_points=self.generated_points,
+        )
+        self.assertEqual(points.shape, (5, 2))
+        self.assertTrue(np.all(weights == 1.0))
+        # Should be a permutation of the input.
+        self.assertEqual(
+            {tuple(row) for row in points.tolist()},
+            {tuple(row) for row in self.generated_points.tolist()},
+        )
+
+    def test_not_enough_points(self) -> None:
+        generator = InSampleUniformGenerator(seed=0)
+        with self.assertRaisesRegex(ValueError, "Cannot select 6 arms"):
+            generator.gen(
+                n=6,
+                search_space_digest=self.ssd,
+                generated_points=self.generated_points,
+            )
+
+    def test_no_generated_points(self) -> None:
+        generator = InSampleUniformGenerator(seed=0)
+        with self.assertRaisesRegex(ValueError, "Cannot select 1 arms: only 0"):
+            generator.gen(
+                n=1,
+                search_space_digest=self.ssd,
+                generated_points=None,
+            )
+
+    def test_reproducibility(self) -> None:
+        """Same seed and init_position produce the same selection."""
+        gen1 = InSampleUniformGenerator(seed=42)
+        gen2 = InSampleUniformGenerator(seed=42)
+        points1, _ = gen1.gen(
+            n=2,
+            search_space_digest=self.ssd,
+            generated_points=self.generated_points,
+        )
+        points2, _ = gen2.gen(
+            n=2,
+            search_space_digest=self.ssd,
+            generated_points=self.generated_points,
+        )
+        self.assertTrue(np.array_equal(points1, points2))
+
+    def test_different_selections_across_calls(self) -> None:
+        """Successive calls produce different selections (init_position advances)."""
+        generator = InSampleUniformGenerator(seed=0)
+        points1, _ = generator.gen(
+            n=2,
+            search_space_digest=self.ssd,
+            generated_points=self.generated_points,
+        )
+        self.assertEqual(generator.init_position, 2)
+        points2, _ = generator.gen(
+            n=2,
+            search_space_digest=self.ssd,
+            generated_points=self.generated_points,
+        )
+        self.assertEqual(generator.init_position, 4)
+        # With 5 points and n=2, different seeds should (almost surely)
+        # produce different selections.
+        self.assertFalse(np.array_equal(points1, points2))
+
+    def test_gen_samples_raises(self) -> None:
+        generator = InSampleUniformGenerator()
+        with self.assertRaises(NotImplementedError):
+            generator._gen_samples(
+                n=1,
+                tunable_d=2,
+                bounds=np.array([[0.0, 1.0], [0.0, 1.0]]),
+            )