Support observation noise in Log outcome transform (#3245)

saitcakmak · meta-codesync[bot] · commit 1d7bbdb84963 · 2026-03-24T12:55:26.000-07:00
Summary: Removes NotImplementedError when Yvar is provided to the Log transform. Uses delta method approximation: Yvar_tf = Yvar / Y^2 in forward, Yvar = Yvar_tf * exp(2 * Y_tf) in untransform. Documents that this assumes Gaussian noise in log-space (log-normal in original space). Closes #2623 Pull Request resolved: #3245 Reviewed By: Balandat Differential Revision: D97353503 Pulled By: saitcakmak fbshipit-source-id: 971317aa197d2c2872c553ed89be30e478f3b26e
diff --git a/botorch/models/transforms/outcome.py b/botorch/models/transforms/outcome.py
@@ -22,6 +22,7 @@
 
 from __future__ import annotations
 
+import logging
 import warnings
 from abc import ABC, abstractmethod
 from collections import OrderedDict
@@ -39,6 +40,8 @@
 from torch import Tensor
 from torch.nn import Module, ModuleDict
 
+logger: logging.Logger = logging.getLogger(__name__)
+
 
 class OutcomeTransform(Module, ABC):
     """Abstract base class for outcome transforms."""
@@ -726,6 +729,11 @@ class Log(OutcomeTransform):
     Useful if the targets are modeled using a (multivariate) log-Normal
     distribution. This means that we can use a standard GP model on the
     log-transformed outcomes and un-transform the model posterior of that GP.
+
+    When observation noise is provided, the variance is transformed using the
+    delta method approximation: Var[log(Y)] ≈ Var[Y] / Y^2. This assumes that
+    the observation noise is Gaussian in the log-transformed space, which
+    corresponds to log-normal observation noise in the original space.
     """
 
     def __init__(self, outputs: list[int] | None = None) -> None:
@@ -789,10 +797,18 @@ def forward(
                 dim=-1,
             )
         if Yvar is not None:
-            # TODO: Delta method, possibly issue warning
-            raise NotImplementedError(
-                "Log does not yet support transforming observation noise"
-            )
+            # Delta method: Var[log(Y)] ≈ Var[Y] / Y^2
+            Yvar_tf = Yvar / Y.clamp(min=1e-8).pow(2)
+            if outputs is not None:
+                Yvar = torch.stack(
+                    [
+                        Yvar_tf[..., i] if i in outputs else Yvar[..., i]
+                        for i in range(Y.size(-1))
+                    ],
+                    dim=-1,
+                )
+            else:
+                Yvar = Yvar_tf
         return Y_tf, Yvar
 
     def untransform(
@@ -825,10 +841,24 @@ def untransform(
                 dim=-1,
             )
         if Yvar is not None:
-            # TODO: Delta method, possibly issue warning
-            raise NotImplementedError(
-                "Log does not yet support transforming observation noise"
+            # Reverse of delta method: Var[Y] = Var[log(Y)] * Y^2
+            # Since Y = exp(Y_log), this is Var[log(Y)] * exp(2 * Y_log)
+            logger.debug(
+                "Log.untransform: Reverse delta method for observation noise "
+                "is a lossy operation. The untransformed variance is an "
+                "approximation that may not exactly match the original variance."
             )
+            Yvar_utf = Yvar * torch.exp(2.0 * Y)
+            if outputs is not None:
+                Yvar = torch.stack(
+                    [
+                        Yvar_utf[..., i] if i in outputs else Yvar[..., i]
+                        for i in range(Y.size(-1))
+                    ],
+                    dim=-1,
+                )
+            else:
+                Yvar = Yvar_utf
         return Y_utf, Yvar
 
     def untransform_posterior(
diff --git a/test/models/transforms/test_outcome.py b/test/models/transforms/test_outcome.py
@@ -597,17 +597,23 @@ def test_log(self):
             self.assertTrue(torch.equal(Y_tf[..., [0]], Y_tf_subset))
             self.assertIsNone(Yvar_tf_subset)
 
-            # test error if observation noise present
+            # test with observation noise (delta method)
             tf = Log()
-            Y = torch.rand(*batch_shape, 3, m, device=self.device, dtype=dtype)
+            Y = 1e-2 + torch.rand(*batch_shape, 3, m, device=self.device, dtype=dtype)
             Yvar = 1e-8 + torch.rand(
                 *batch_shape, 3, m, device=self.device, dtype=dtype
             )
-            with self.assertRaises(NotImplementedError):
-                tf(Y, Yvar)
+            Y_tf, Yvar_tf = tf(Y, Yvar)
+            self.assertTrue(tf.training)
+            self.assertAllClose(Y_tf, torch.log(Y))
+            # Delta method: Var[log(Y)] ≈ Var[Y] / Y^2
+            self.assertAllClose(Yvar_tf, Yvar / Y.pow(2))
             tf.eval()
-            with self.assertRaises(NotImplementedError):
-                tf.untransform(Y, Yvar)
+            self.assertFalse(tf.training)
+            Y_utf, Yvar_utf = tf.untransform(Y_tf, Yvar_tf)
+            self.assertAllClose(Y_utf, Y)
+            # Reverse: Var[Y] = Var[log(Y)] * exp(2 * log(Y)) = Var[log(Y)] * Y^2
+            self.assertAllClose(Yvar_utf, Yvar)
 
             # untransform_posterior
             tf = Log()
@@ -661,14 +667,22 @@ def test_log(self):
             with self.assertRaises(NotImplementedError):
                 tf_subset = tf.subset_output(idcs=[0])
 
-            # with observation noise
+            # with observation noise (subset of outputs)
             tf = Log(outputs=outputs)
-            Y = torch.rand(*batch_shape, 3, m, device=self.device, dtype=dtype)
+            Y = 1e-2 + torch.rand(*batch_shape, 3, m, device=self.device, dtype=dtype)
             Yvar = 1e-8 + torch.rand(
                 *batch_shape, 3, m, device=self.device, dtype=dtype
             )
-            with self.assertRaises(NotImplementedError):
-                tf(Y, Yvar)
+            Y_tf, Yvar_tf = tf(Y, Yvar)
+            # output 0 should be untransformed, output 1 should be transformed
+            self.assertAllClose(Y_tf[..., 0], Y[..., 0])
+            self.assertAllClose(Y_tf[..., 1], torch.log(Y[..., 1]))
+            self.assertAllClose(Yvar_tf[..., 0], Yvar[..., 0])
+            self.assertAllClose(Yvar_tf[..., 1], Yvar[..., 1] / Y[..., 1].pow(2))
+            tf.eval()
+            Y_utf, Yvar_utf = tf.untransform(Y_tf, Yvar_tf)
+            self.assertAllClose(Y_utf, Y)
+            self.assertAllClose(Yvar_utf, Yvar)
 
             # error on untransform_posterior
             with self.assertRaises(NotImplementedError):
@@ -722,13 +736,17 @@ def test_chained_outcome_transform(self):
             with self.assertRaises(RuntimeError):
                 tf.subset_output(idcs=[0, 1, 2])
 
-            # test error if observation noise present
-            Y = torch.rand(*batch_shape, 3, m, device=self.device, dtype=dtype)
+            # test observation noise is propagated through chained transform
+            Y = 1e-2 + torch.rand(*batch_shape, 3, m, device=self.device, dtype=dtype)
             Yvar = 1e-8 + torch.rand(
                 *batch_shape, 3, m, device=self.device, dtype=dtype
             )
-            with self.assertRaises(NotImplementedError):
-                tf(Y, Yvar)
+            tf1 = Log()
+            tf2 = Standardize(m=m, batch_shape=batch_shape)
+            tf = ChainedOutcomeTransform(log=tf1, standardize=tf2)
+            Y_tf, Yvar_tf = tf(Y, Yvar)
+            self.assertEqual(Y_tf.shape, Y.shape)
+            self.assertEqual(Yvar_tf.shape, Yvar.shape)
 
             # untransform_posterior
             tf1 = Log()
@@ -781,15 +799,19 @@ def test_chained_outcome_transform(self):
             torch.allclose(Y_utf, Y)
             self.assertIsNone(Yvar_utf)
 
-            # with observation noise
-            Y = torch.rand(*batch_shape, 3, m, device=self.device, dtype=dtype)
+            # with observation noise (subset outputs)
+            Y = 1e-2 + torch.rand(*batch_shape, 3, m, device=self.device, dtype=dtype)
             Yvar = 1e-8 + torch.rand(
                 *batch_shape, 3, m, device=self.device, dtype=dtype
             )
-            with self.assertRaises(NotImplementedError):
-                tf(Y, Yvar)
+            tf1 = Log(outputs=outputs)
+            tf2 = Standardize(m=m, outputs=outputs, batch_shape=batch_shape)
+            tf = ChainedOutcomeTransform(log=tf1, standardize=tf2)
+            Y_tf, Yvar_tf = tf(Y, Yvar)
+            self.assertEqual(Y_tf.shape, Y.shape)
+            self.assertEqual(Yvar_tf.shape, Yvar.shape)
 
-            # error on untransform_posterior
+            # error on untransform_posterior (subset outputs not supported)
             with self.assertRaises(NotImplementedError):
                 tf.untransform_posterior(None)