Add DataAllocator (facebookresearch#789)

JasonKChow · facebook-github-bot · commit eec28622c196 · 2025-04-29T16:32:56.000-07:00
Summary:

The DataAllocator inducing point allocator just returns the input as inducing points.

Differential Revision: D73885655
diff --git a/aepsych/models/inducing_points/__init__.py b/aepsych/models/inducing_points/__init__.py
@@ -8,12 +8,14 @@
 import sys
 
 from ...config import Config
+from .data import DataAllocator
 from .fixed import FixedAllocator, FixedPlusAllocator
 from .greedy_variance_reduction import GreedyVarianceReduction
 from .kmeans import KMeansAllocator
 from .sobol import SobolAllocator
 
 __all__ = [
+    "DataAllocator",
     "FixedAllocator",
     "FixedPlusAllocator",
     "GreedyVarianceReduction",
diff --git a/aepsych/models/inducing_points/data.py b/aepsych/models/inducing_points/data.py
@@ -0,0 +1,57 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+import warnings
+
+import torch
+from aepsych.models.inducing_points.base import BaseAllocator, EMPTY_SIZE
+
+
+class DataAllocator(BaseAllocator):
+    def __init__(
+        self,
+        dim: int,
+    ) -> None:
+        """Initialize the DataAllocator. This allocator simply returns the input
+        data to use as the inducing points.
+
+        Args:
+            dim (int): Dimensionality of the search space.
+        """
+        super().__init__(dim=dim)
+
+    def allocate_inducing_points(
+        self,
+        inputs: torch.Tensor | None = None,
+        covar_module: torch.nn.Module | None = None,
+        num_inducing: int = 100,
+        input_batch_shape: torch.Size = EMPTY_SIZE,
+    ) -> torch.Tensor:
+        """Allocate inducing points by returning the inputs as the inducing points.
+
+        Args:
+            inputs (torch.Tensor): Input tensor, cloned and returned as inducing points.
+            covar_module (torch.nn.Module, optional): Kernel covariance module; included for API compatibility, but not used here.
+            num_inducing (int, optional): The number of inducing points to generate. This parameter is ignored by DataAllocator,
+                which always returns all input points.
+            input_batch_shape (torch.Size, optional): Batch shape; included for API compatibility, but not used here.
+
+        Returns:
+            torch.Tensor: The input data as inducing points.
+        """
+        if inputs is None:  # Dummy points
+            return self._allocate_dummy_points(num_inducing=num_inducing)
+
+        if num_inducing < inputs.shape[0]:
+            warnings.warn(
+                f"DataAllocator ignores num_inducing={num_inducing} and returns all input points.",
+                UserWarning,
+                stacklevel=2,
+            )
+
+        self.last_allocator_used = self.__class__
+        return inputs.clone().detach()
diff --git a/tests/test_points_allocators.py b/tests/test_points_allocators.py
@@ -11,13 +11,14 @@
 from aepsych.config import Config
 from aepsych.models.gp_classification import GPClassificationModel
 from aepsych.models.inducing_points import (
+    DataAllocator,
     FixedAllocator,
     FixedPlusAllocator,
     GreedyVarianceReduction,
     KMeansAllocator,
     SobolAllocator,
 )
-from aepsych.strategy import Strategy
+from aepsych.strategy import SequentialStrategy, Strategy
 from aepsych.transforms.parameters import ParameterTransforms, transform_options
 from sklearn.datasets import make_classification
 
@@ -482,6 +483,79 @@ def test_fixed_plus_allocator_dimension_mismatch(self):
                 main_allocator=KMeansAllocator,
             )
 
+    def test_data_allocator(self):
+        """Test basic functionality of DataAllocator."""
+        allocator = DataAllocator(dim=2)
+        inputs = torch.tensor([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
+
+        # Test that it returns the input data and sets last_allocator_used
+        inducing_points = allocator.allocate_inducing_points(
+            inputs=inputs, num_inducing=10
+        )
+        self.assertTrue(torch.equal(inducing_points, inputs))
+        self.assertIs(allocator.last_allocator_used, DataAllocator)
+        self.assertIsNot(inducing_points, inputs)  # Should be a clone
+
+        # Test when no inputs are provided we get dummy points
+        inducing_points = allocator.allocate_inducing_points(num_inducing=10)
+        self.assertEqual(inducing_points.shape, (10, 2))
+        self.assertTrue(torch.all(inducing_points == 0))
+
+        # Test warning when num_inducing is less than inputs
+        with self.assertWarns(UserWarning) as w:
+            inducing_points = allocator.allocate_inducing_points(
+                inputs=inputs, num_inducing=2
+            )
+
+        self.assertEqual(len(w.warnings), 1)
+        self.assertIn("DataAllocator ignores num_inducing=2", w.warning.args[0])
+        self.assertTrue(torch.all(inducing_points == inputs))
+
+    def test_data_allocator_config_smoketest(self):
+        """Test DataAllocator integration with model and config."""
+        # Test with config
+        config_str = """
+            [common]
+            parnames = [par1]
+            stimuli_per_trial = 1
+            outcome_types = [binary]
+            strategy_names = [init_strat, opt_strat]
+
+            [par1]
+            par_type = continuous
+            lower_bound = 0
+            upper_bound = 1
+
+            [init_strat]
+            generator = SobolGenerator
+            min_asks = 2
+
+            [opt_strat]
+            generator = OptimizeAcqfGenerator
+            min_asks = 1
+            model = GPClassificationModel
+
+            [GPClassificationModel]
+            inducing_point_method = DataAllocator
+            inducing_size = 2
+
+            [OptimizeAcqfGenerator]
+            acqf = MCLevelSetEstimation
+        """
+
+        config = Config()
+        config.update(config_str=config_str)
+        strat = SequentialStrategy.from_config(config)
+
+        for response in [0, 1]:
+            point = strat.gen()
+            strat.add_data(point, torch.tensor([response]))
+
+        point = strat.gen()
+        self.assertTrue(
+            torch.all(strat.model.variational_strategy.inducing_points == strat.x)
+        )
+
 
 if __name__ == "__main__":
     unittest.main()