Skip to content

Commit 71a48f2

Browse files
JasonKChowfacebook-github-bot
authored andcommitted
Mixed input inducing point allocators (#792)
Summary: Add mixed input inducing point allocators. Create two variations to support mixed input variational models. Each allocator combines another alloctor to handle the continuous inducing points. The SubsetDiscreteAllocator focuses primarily on the continuous allocator. First allowing the continuous alloctor to generate points then randomly selects a discrete points to pair with each continuous point. The AllDiscreteAllocator focuses on the discrete parameters, creating a continuous inducing point for each possible discrete point given the search space. Currently no tests validating that these allocators work when using config, will come alongside mixed input models. Differential Revision: D73936883
1 parent 930bebf commit 71a48f2

3 files changed

Lines changed: 449 additions & 0 deletions

File tree

aepsych/models/inducing_points/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,18 @@
1212
from .fixed import FixedAllocator, FixedPlusAllocator
1313
from .greedy_variance_reduction import GreedyVarianceReduction
1414
from .kmeans import KMeansAllocator
15+
from .mixed import AllMixedAllocator, SubsetMixedAllocator
1516
from .sobol import SobolAllocator
1617

1718
__all__ = [
19+
"AllMixedAllocator",
1820
"DataAllocator",
1921
"FixedAllocator",
2022
"FixedPlusAllocator",
2123
"GreedyVarianceReduction",
2224
"KMeansAllocator",
2325
"SobolAllocator",
26+
"SubsetMixedAllocator",
2427
]
2528

2629
Config.register_module(sys.modules[__name__])
Lines changed: 247 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,247 @@
1+
#!/usr/bin/env python3
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
5+
# This source code is licensed under the license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
from functools import cached_property
9+
from typing import Any
10+
11+
import torch
12+
from aepsych.config import Config
13+
from aepsych.models.inducing_points.base import BaseAllocator, EMPTY_SIZE
14+
15+
16+
class MixedBaseAllocator(BaseAllocator):
17+
def __init__(
18+
self,
19+
dim: int,
20+
categorical_params: dict[int, int],
21+
continuous_allocator: type[BaseAllocator],
22+
**kwargs: Any,
23+
) -> None:
24+
"""Base class for mixed allocators. This class splits the input into
25+
continuous and categorical parts and then allocates inducing points for
26+
the categorical parts using the continuous alloctor. The different sub-
27+
classes are largely different only in their allocate_inducing_points
28+
method.
29+
30+
Args:
31+
dim (int): Dimensionality of the mixed search space.
32+
categorical_params (dict[int, int]): Dictionary specifying which parameters are
33+
categorical and how many options they have.
34+
continuous_allocator (type[BaseAllocator]): The type of allocator to use for the
35+
continuous parameters. This will be initialized with the additional **kwargs.
36+
**kwargs: Keyword arguments to pass to the continuous_allocator to initialize it.
37+
"""
38+
super().__init__(dim=dim)
39+
self.categorical_params = categorical_params
40+
self.categorical_idxs = sorted(list(categorical_params.keys()))
41+
self.continuous_idxs = sorted(
42+
list(set(range(dim)) - set(self.categorical_idxs))
43+
)
44+
45+
# Initialize the continuous allocator
46+
self.continuous_allocator = continuous_allocator(
47+
dim=len(self.continuous_idxs), **kwargs
48+
)
49+
50+
# Check if the continuous allocator produces the right shape
51+
dummy = self.continuous_allocator.allocate_inducing_points(inputs=None)
52+
if dummy.shape[1] != len(self.continuous_idxs):
53+
raise ValueError(
54+
"The continuous allocator does not produce the right shape. "
55+
f"Got {dummy.shape[1]} and expected {len(self.continuous_idxs)}. "
56+
"The kwargs for the continuous allocator should be chosen as if only "
57+
"the continuous parameters were present."
58+
)
59+
60+
@cached_property
61+
def categorical_points(self) -> torch.Tensor:
62+
"""Return a tensor of all categorication configurations given the
63+
categorical parameters. This is cached.
64+
65+
Returns:
66+
torch.Tensor: Tensor of all categorical configurations.
67+
"""
68+
points = torch.cartesian_prod(
69+
*[torch.arange(self.categorical_params[i]) for i in self.categorical_idxs]
70+
)
71+
72+
if len(points.shape) == 1:
73+
points = points.unsqueeze(1)
74+
75+
return points
76+
77+
def _split_inputs(self, inputs) -> tuple[torch.Tensor, torch.Tensor]:
78+
"""Split the inputs into continuous and categorical parts.
79+
80+
Args:
81+
inputs (torch.Tensor): Input tensor.
82+
83+
Returns:
84+
tuple[torch.Tensor, torch.Tensor]: Continuous and categorical parts of the input.
85+
"""
86+
return inputs[:, self.continuous_idxs], inputs[:, self.categorical_idxs]
87+
88+
def _combine_inducing_points(
89+
self, continuous_induc: torch.Tensor, categorical_induc: torch.Tensor
90+
) -> torch.Tensor:
91+
"""Combine continuous and categorical inducing points into a single tensor.
92+
93+
Args:
94+
continuous_induc (torch.Tensor): Continuous inducing points.
95+
categorical_induc (torch.Tensor): Categorical inducing points.
96+
97+
Returns:
98+
torch.Tensor: Combined inducing points.
99+
"""
100+
categorical_induc = categorical_induc.to(continuous_induc)
101+
102+
# Make a dummy tensor to fill in
103+
inducing_points = torch.empty((continuous_induc.shape[0], self.dim))
104+
inducing_points = inducing_points.to(continuous_induc)
105+
inducing_points[:, self.continuous_idxs] = continuous_induc
106+
inducing_points[:, self.categorical_idxs] = categorical_induc
107+
108+
return inducing_points
109+
110+
@classmethod
111+
def get_config_options(
112+
cls,
113+
config: Config,
114+
name: str | None = None,
115+
options: dict[str, Any] | None = None,
116+
) -> dict[str, Any]:
117+
"""Get configuration options for the categorical allocator.
118+
119+
Args:
120+
config (Config): Configuration object.
121+
name (str, optional): Name of the allocator, defaults to None. Ignored.
122+
options (dict[str, Any], optional): Additional options, defaults to None.
123+
124+
Returns:
125+
dict[str, Any]: Configuration options for the CategoricalAllocator.
126+
"""
127+
options = super().get_config_options(config, name, options)
128+
129+
par_names = config.getlist("common", "parnames", element_type=str)
130+
categorical_params: dict[int, int] = {}
131+
for i, par_name in enumerate(par_names):
132+
if config.get(par_name, "par_type") == "categorical":
133+
categorical_params[i] = len(config.getlist(par_name, "options"))
134+
135+
options["categorical_params"] = categorical_params
136+
137+
# TODO: Check if we need extra logic to initialize the continuous allocator
138+
139+
return options
140+
141+
142+
class SubsetMixedAllocator(MixedBaseAllocator):
143+
"""Inducing point allocator for mixed input models that places continuous inducing points
144+
on a random subset of the categorical indices.
145+
"""
146+
147+
def allocate_inducing_points(
148+
self,
149+
inputs: torch.Tensor | None = None,
150+
covar_module: torch.nn.Module | None = None,
151+
num_inducing: int = 100,
152+
input_batch_shape: torch.Size = EMPTY_SIZE,
153+
) -> torch.Tensor:
154+
"""Allocate inducing points by placing continuous inducing points on a random subset
155+
of the categorical configurations.
156+
157+
Args:
158+
inputs (torch.Tensor, optional): Input tensor containing both continuous and categorical parts.
159+
covar_module (torch.nn.Module, optional): Kernel covariance module.
160+
num_inducing (int, optional): The number of inducing points to generate. Defaults to 100.
161+
input_batch_shape (torch.Size, optional): Batch shape, defaults to an empty size.
162+
163+
Returns:
164+
torch.Tensor: The allocated inducing points.
165+
"""
166+
if inputs is None:
167+
return self._allocate_dummy_points(num_inducing)
168+
169+
# Split inputs into continuous parts
170+
x_continuous = self._split_inputs(inputs)[0]
171+
172+
# Create continuous inducing points
173+
continuous_induc = self.continuous_allocator.allocate_inducing_points(
174+
inputs=x_continuous,
175+
covar_module=covar_module,
176+
num_inducing=num_inducing,
177+
input_batch_shape=input_batch_shape,
178+
)
179+
180+
# Generate all possible combinations of categorical parameters
181+
idx = torch.randint(0, self.categorical_points.shape[0], (num_inducing,))
182+
categorical_induc = self.categorical_points[idx].clone()
183+
184+
# Combine continuous and categorical inducing points
185+
inducing_points = self._combine_inducing_points(
186+
continuous_induc=continuous_induc, categorical_induc=categorical_induc
187+
)
188+
189+
self.last_allocator_used = self.__class__
190+
return inducing_points
191+
192+
193+
class AllMixedAllocator(MixedBaseAllocator):
194+
"""Inducing point allocator for mixed input models that places continuous inducing points
195+
for each permutation of the categorical indices. Probably doesn't scale very well so this
196+
should primarily be used for analysis.
197+
"""
198+
199+
def allocate_inducing_points(
200+
self,
201+
inputs: torch.Tensor | None = None,
202+
covar_module: torch.nn.Module | None = None,
203+
num_inducing: int = 100,
204+
input_batch_shape: torch.Size = EMPTY_SIZE,
205+
) -> torch.Tensor:
206+
"""Allocate inducing points by placing continuous inducing points for each
207+
permutation of the categorical indices.
208+
209+
Args:
210+
inputs (torch.Tensor, optional): Input tensor containing both continuous and categorical parts.
211+
covar_module (torch.nn.Module, optional): Kernel covariance module.
212+
num_inducing (int, optional): Ignored as this allocator will generate an inducing point for each
213+
possible categorical configuration.
214+
input_batch_shape (torch.Size, optional): Batch shape, defaults to an empty size.
215+
216+
Returns:
217+
torch.Tensor: The allocated inducing points.
218+
"""
219+
if inputs is None:
220+
return self._allocate_dummy_points(num_inducing)
221+
222+
# Split inputs into continuous parts
223+
x_continuous = self._split_inputs(inputs)[0]
224+
225+
# Generate a continuous inducing point for each categorical configuration
226+
continuous_induc = self.continuous_allocator.allocate_inducing_points(
227+
inputs=x_continuous,
228+
covar_module=covar_module,
229+
num_inducing=self.categorical_points.shape[0],
230+
input_batch_shape=input_batch_shape,
231+
)
232+
233+
if continuous_induc.shape[0] != self.categorical_points.shape[0]:
234+
raise ValueError(
235+
"The continuous allocator did not produce enough inducing points, this "
236+
"likely means the continuous allocator is not compatible with the AllMixedAllocator. "
237+
f"Got {continuous_induc.shape[0]} and {self.categorical_points.shape[0]}."
238+
)
239+
240+
# Combine continuous and categorical inducing points
241+
inducing_points = self._combine_inducing_points(
242+
continuous_induc=continuous_induc,
243+
categorical_induc=self.categorical_points.clone(),
244+
)
245+
246+
self.last_allocator_used = self.__class__
247+
return inducing_points

0 commit comments

Comments
 (0)