SamuelGabriel
diff --git a/‎botorch_community/acquisition/discretized.py
Lines changed: 146 additions & 0 deletions b/‎botorch_community/acquisition/discretized.py
Lines changed: 146 additions & 0 deletions
diff --git a/‎botorch_community/models/prior_fitted_network.py
Lines changed: 96 additions & 0 deletions b/‎botorch_community/models/prior_fitted_network.py
Lines changed: 96 additions & 0 deletions
diff --git a/‎botorch_community/models/utils/prior_fitted_network.py
Lines changed: 73 additions & 0 deletions b/‎botorch_community/models/utils/prior_fitted_network.py
Lines changed: 73 additions & 0 deletions
@@ -0,0 +1,146 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+r"""Abstract base module for all botorch acquisition functions."""
+
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+
+import torch
+
+from botorch.acquisition import AcquisitionFunction
+from botorch.models.model import Model
+from botorch.utils.transforms import t_batch_mode_transform
+from torch import Tensor
+
+
+class DiscretizedAcquistionFunction(AcquisitionFunction, ABC):
+    r"""DiscretizedAcquistionFunction is an abstract base class for acquisition
+    functions that are defined on discrete distributions. It wraps a model and
+    implements a forward method that computes the acquisition function value at
+    a given set of points.
+    This class can be subclassed to define acquisiton functions for Riemann-
+    distributed posteriors.
+    The acquisition function must have the form $$acq(x) = \int p(y|x) ag(x)$$,
+    where $$ag$$ is defined differently for each acquisition function.
+    The ag_integrate method, which computes the integral of ag between two points, must be
+    implemented by subclasses to define the specific acquisition functions.
+    """
+
+    def __init__(self, model: Model) -> None:
+        super().__init__(model=model)
+
+    @t_batch_mode_transform(expected_q=1)
+    def forward(self, X: Tensor) -> Tensor:
+        r"""Evaluate the acquisition function on the candidate set X.
+
+        Args:
+            X: A `(b) x q x d`-dim Tensor of `(b)` t-batches with `q` `d`-dim
+                design points each.
+
+        Returns:
+            A `(b)`-dim Tensor of the acquisition function at the given
+            design points `X`.
+        """
+        self.to(device=X.device)
+
+        discrete_posterior = self.model.posterior(X)
+        result = discrete_posterior.integrate(self.ag_integrate)
+        # remove q dimension
+        return result.squeeze(-1)
+
+    @abstractmethod
+    def ag_integrate(self, lower_bound: Tensor, upper_bound: Tensor) -> Tensor:
+        r"""
+        This function calculates the integral that computes the acquisition function
+        without the posterior factor from lower_bound to upper_bound.
+        That is, our acquisition function is assumed to have the form
+        \int ag(x) * p(x) dx,
+        and this function calculates \int_{lower_bound}^{upper_bound} ag(x) dx.
+        The `integrate` method of the posterior (`BoundedRiemannPosterior`)
+        then computes the final acquisition value.
+
+        Args:
+            lower_bound: lower bound of integral
+            upper_bound: upper bound of integral
+
+        Returns:
+            A `(b)`-dim Tensor of acquisition function derivatives at the given
+            design points `X`.
+        """
+        pass  # pragma: no cover
+
+    r"""DiscretizedExpectedImprovement is an acquisition function that computes
+    the expected improvement over the current best observed value for a Riemann
+    distribution."""
+
+
+class DiscretizedExpectedImprovement(DiscretizedAcquistionFunction):
+    r"""DiscretizedExpectedImprovement is an acquisition function that
+    computes the expected improvement over the current best observed value
+    for a Riemann distribution.
+    """
+
+    def __init__(self, model: Model, best_f: Tensor) -> None:
+        super().__init__(model)
+        self.register_buffer("best_f", torch.as_tensor(best_f))
+
+    def ag_integrate(self, lower_bound: Tensor, upper_bound: Tensor) -> Tensor:
+        r"""
+        As Expected improvement has ag(y) = (y - best_f).clamp(min=0), and
+        is defined as \int ag(y) * p(y) dy, we can calculate the integral
+        of ag(y) like so:
+        We just calculate ag(y) at beginning and end, and since the function has
+        a gradient of 1 or 0, we can just take the average of the two.
+
+        Args:
+            lower_bound: lower bound of integral
+            upper_bound: upper bound of integral
+
+        Returns:
+            A `(b)`-dim Tensor of acquisition function derivatives at the given
+            design points `X`.
+        """
+        max_lower_bound_and_f = torch.max(self.best_f, lower_bound)
+        bucket_average = (upper_bound + max_lower_bound_and_f) / 2
+        improvement = bucket_average - self.best_f
+
+        return improvement.clamp_min(0)
+
+
+class DiscretizedProbabilityOfImprovement(DiscretizedAcquistionFunction):
+    r"""DiscretizedProbabilityOfImprovement is an acquisition function that
+    computes the probability of improvement over the current best observed value
+    for a Riemann distribution.
+    """
+
+    def __init__(self, model: Model, best_f: Tensor) -> None:
+        super().__init__(model)
+        self.register_buffer("best_f", torch.as_tensor(best_f))
+
+    def ag_integrate(self, lower_bound: Tensor, upper_bound: Tensor) -> Tensor:
+        r"""
+        PI is defined as \int ag(y) * p(y) dy, where ag(y) = I(y - best_f)
+        and I being the indicator function.
+
+        So all we need to do is calculate the portion between the bounds
+        that is larger than best_f.
+        We do this by comparing how much higher the upper bound is than best_f,
+        compared to the size of the bucket.
+        Then we clamp at one if best_f is below lower_bound and at zero if
+        best_f is above upper_bound.
+
+        Args:
+            lower_bound: lower bound of integral
+            upper_bound: upper bound of integral
+
+        Returns:
+            A `(b)`-dim Tensor of acquisition function derivatives at the given
+            design points `X`.
+        """
+        proportion = (upper_bound - self.best_f) / (upper_bound - lower_bound)
+        return proportion.clamp(0, 1)
@@ -0,0 +1,96 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+r"""
+This module defines the botorch model for PFNs (`PFNModel`) and it
+provides handy helpers to download pretrained, public PFNs
+with `download_model` and model paths with `ModelPaths`.
+For the latter to work `pfns4bo` must be installed.
+"""
+
+from __future__ import annotations
+
+from typing import Optional, Union
+
+import torch.nn as nn
+
+from botorch.acquisition.objective import PosteriorTransform
+from botorch.exceptions.errors import UnsupportedError
+from botorch.models.model import Model
+
+from botorch_community.posteriors.riemann import BoundedRiemannPosterior
+from torch import Tensor
+
+
+class PFNModel(Model):
+    """Prior-data Fitted Network"""
+
+    def __init__(
+        self,
+        train_X: Tensor,
+        train_Y: Tensor,
+        model: nn.Module,
+    ) -> None:
+        """Initialize a PFNModel.
+
+        Args:
+            train_X: A `batch_shape x n x d` tensor of training features.
+            train_Y: A `batch_shape x n x m` tensor of training observations.
+            model: A PFN model with the following
+                forward(train_X, train_Y, X) -> logit predictions of shape
+                `n x b x c` where c is the number of discrete buckets
+                borders: A `c+1`-dim tensor of bucket borders
+        """
+        super().__init__()
+        self.train_X = train_X
+        self.train_Y = train_Y
+        self.pfn = model.to(train_X)
+
+    def posterior(
+        self,
+        X: Tensor,
+        output_indices: Optional[list[int]] = None,
+        observation_noise: Union[bool, Tensor] = False,
+        posterior_transform: Optional[PosteriorTransform] = None,
+    ) -> BoundedRiemannPosterior:
+        r"""Computes the posterior over model outputs at the provided points.
+
+        Note: The input transforms should be applied here using
+            `self.transform_inputs(X)` after the `self.eval()` call and before
+            any `model.forward` or `model.likelihood` calls.
+
+        Args:
+            X: A `b x q x d`-dim Tensor, where `d` is the dimension of the
+                feature space, `q` is the number of points considered jointly,
+                and `b` is the batch dimension.
+            **Currently not supported for PFNModel**.
+            output_indices: **Currenlty not supported for PFNModel.**
+            observation_noise: **Currently not supported for PFNModel**.
+            posterior_transform: **Currently not supported for PFNModel**.
+
+        Returns:
+            A `BoundedRiemannPosterior` object, representing a batch of `b` joint distributions
+            over `q` points and `m` outputs each.
+        """
+        self.pfn.eval()
+        if output_indices is not None:
+            raise RuntimeError(
+                "output_indices is not None. PFNModel should not be a multi-output model."
+            )
+        if observation_noise:
+            raise UnsupportedError("observation_noise is not supported for PFNModel.")
+        if posterior_transform is not None:
+            raise UnsupportedError("posterior_transform is not supported for PFNModel.")
+
+        if len(X.shape) > 2 and X.shape[-2] > 1:
+            raise NotImplementedError("q must be 1 for PFNModel.")  # add support later
+
+        # flatten batch dimensions for PFN input
+        logits = self.pfn(self.train_X, self.train_Y, X)
+
+        probabilities = logits.softmax(dim=-1)
+
+        return BoundedRiemannPosterior(self.pfn.criterion.borders, probabilities)
@@ -0,0 +1,73 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import gzip
+import io
+import os
+from enum import Enum
+from typing import Optional
+
+import torch
+import torch.nn as nn
+
+
+class ModelPaths(Enum):
+    """Enum for PFN models"""
+
+    pfns4bo_hebo = "https://github.com/automl/PFNs4BO/raw/refs/heads/main/pfns4bo/final_models/hebo_morebudget_9_unused_features_3_userpriorperdim2_8.pt.gz"
+    pfns4bo_bnn = "https://github.com/automl/PFNs4BO/raw/refs/heads/main/pfns4bo/final_models/model_sampled_warp_simple_mlp_for_hpob_46.pt.gz"
+    pfns4bo_hebo_userprior = "https://github.com/automl/PFNs4BO/raw/refs/heads/main/pfns4bo/final_models/hebo_morebudget_9_unused_features_3_userpriorperdim2_8.pt.gz"
+
+
+def download_model(
+    model_path: str | ModelPaths,
+    proxies: Optional[dict[str, str]] = None,
+    cache_dir: Optional[str] = None,
+) -> nn.Module:
+    """Download and load PFN model weights from a URL.
+
+    Args:
+        model_path: A string representing the URL of the model to load or a ModelPaths enum.
+        proxies: An optional dictionary mapping from network protocols, e.g. ``http``, to proxy addresses.
+        cache_dir: The cache dir to use, if not specified we will use ``/tmp/botorch_pfn_models``
+
+    Returns:
+        A PFN model.
+    """
+    try:
+        import requests
+    except ImportError:
+        raise ImportError(
+            "The 'requests' library is not installed. "
+            "You can install it using `pip install requests`."
+        )
+    if isinstance(model_path, ModelPaths):
+        model_path = model_path.value
+
+    if cache_dir is None:
+        cache_dir = "/tmp/botorch_pfn_models"
+
+    os.makedirs(cache_dir, exist_ok=True)
+    cache_path = os.path.join(cache_dir, model_path.split("/")[-1])
+
+    if not os.path.exists(cache_path):
+        # Download the model weights
+        response = requests.get(model_path, proxies=proxies or None)
+        response.raise_for_status()
+
+        # Decompress the gzipped model weights
+        with gzip.GzipFile(fileobj=io.BytesIO(response.content)) as gz:
+            model = torch.load(gz, map_location=torch.device("cpu"))
+
+        # Save the model to cache
+        torch.save(model, cache_path)
+        print("saved at: ", cache_path)
+    else:
+        # Load the model from cache
+        model = torch.load(cache_path, map_location=torch.device("cpu"))
+        print("loaded from cache: ", cache_path)
+
+    return model