Adding assign to load_state_dict implementations (meta-pytorch#3193)

Carl Hvarfner · meta-codesync[bot] · commit a4088cb62b0a · 2026-02-20T02:55:01.000-08:00
Summary: Pull Request resolved: meta-pytorch#3193 Pull Request resolved: meta-pytorch#3080 This commit adds `assign` to `GPyTorchModel.load_state_dict` and other model types, to ensure consistency with `Module.load_state_dict`. Dependent on D87084496 (OSS here: https://github.com/cornellius-gp/gpytorch/pull/2691/commits). Reviewed By: saitcakmak Differential Revision: D86870038 fbshipit-source-id: bbbcd33dc3edd991e963a4a2554054fe0bd3551d
diff --git a/botorch/models/fully_bayesian.py b/botorch/models/fully_bayesian.py
@@ -1111,7 +1111,10 @@ def median_lengthscale(self) -> Tensor:
         return lengthscale.median(0).values.squeeze(0)
 
     def load_state_dict(
-        self, state_dict: Mapping[str, Any], strict: bool = True
+        self,
+        state_dict: Mapping[str, Any],
+        strict: bool = True,
+        assign: bool = False,
     ) -> None:
         r"""Custom logic for loading the state dict.
 
@@ -1133,7 +1136,7 @@ def load_state_dict(
         )
         self.load_mcmc_samples(mcmc_samples=mcmc_samples)
         # Load the actual samples from the state dict
-        super().load_state_dict(state_dict=state_dict, strict=strict)
+        super().load_state_dict(state_dict=state_dict, strict=strict, assign=assign)
 
 
 class SaasFullyBayesianSingleTaskGP(FullyBayesianSingleTaskGP):
@@ -1184,7 +1187,10 @@ def median_weight_variance(self) -> Tensor:
         return weight_variance.median(0).values.squeeze(0)
 
     def load_state_dict(
-        self, state_dict: Mapping[str, Any], strict: bool = True
+        self,
+        state_dict: Mapping[str, Any],
+        strict: bool = True,
+        assign: bool = False,
     ) -> None:
         r"""Custom logic for loading the state dict.
 
@@ -1205,4 +1211,4 @@ def load_state_dict(
         )
         self.load_mcmc_samples(mcmc_samples=mcmc_samples)
         # Load the actual samples from the state dict
-        super().load_state_dict(state_dict=state_dict, strict=strict)
+        super().load_state_dict(state_dict=state_dict, strict=strict, assign=assign)
diff --git a/botorch/models/gpytorch.py b/botorch/models/gpytorch.py
@@ -329,6 +329,7 @@ def load_state_dict(
         state_dict: Mapping[str, Any],
         strict: bool = True,
         keep_transforms: bool = True,
+        assign: bool = False,
     ) -> None:
         r"""Load the model state.
 
@@ -338,9 +339,17 @@ def load_state_dict(
             keep_transforms: A boolean indicating whether to keep the input and outcome
                 transforms. Doing so is useful when loading a model that was trained on
                 a full set of data, and is later loaded with a subset of the data.
+            assign: When set to ``False``, the properties of the tensors in the current
+                module are preserved whereas setting it to ``True`` preserves
+                properties of the Tensors in the state dict. The only
+                exception is the ``requires_grad`` field of :class:`~torch.nn.Parameter`
+                for which the value from the module is preserved. Default: ``False``.
         """
+        if assign:
+            first_item = next(iter(state_dict.values()))
+            self.to(first_item)
         if not keep_transforms:
-            super().load_state_dict(state_dict, strict)
+            super().load_state_dict(state_dict=state_dict, strict=strict, assign=assign)
             return
 
         should_outcome_transform = (
@@ -369,10 +378,12 @@ def load_state_dict(
                         BotorchWarning,
                         stacklevel=3,
                     )
-                    super().load_state_dict(state_dict, strict)
+                    super().load_state_dict(
+                        state_dict=state_dict, strict=strict, assign=assign
+                    )
                     return
 
-        super().load_state_dict(state_dict, strict)
+        super().load_state_dict(state_dict=state_dict, strict=strict, assign=assign)
 
         if getattr(self, "input_transform", None) is not None:
             self.input_transform.eval()
@@ -764,8 +775,11 @@ def load_state_dict(
         self,
         state_dict: Mapping[str, Any],
         strict: bool = True,
+        assign: bool = False,
     ) -> None:
-        return ModelList.load_state_dict(self, state_dict, strict)
+        return ModelList.load_state_dict(
+            self, state_dict=state_dict, strict=strict, assign=assign
+        )
 
     # pyre-fixme[14]: Inconsistent override in return types
     def posterior(
diff --git a/botorch/models/model.py b/botorch/models/model.py
@@ -582,6 +582,7 @@ def load_state_dict(
         state_dict: Mapping[str, Any],
         strict: bool = True,
         keep_transforms: bool = True,
+        assign: bool = False,
     ) -> None:
         """Initialize the fully Bayesian models before loading the state dict."""
         for i, m in enumerate(self.models):
@@ -590,7 +591,7 @@ def load_state_dict(
                 for k, v in state_dict.items()
                 if k.startswith(f"models.{i}.")
             }
-            m.load_state_dict(filtered_dict, strict=strict)
+            m.load_state_dict(filtered_dict, strict=strict, assign=assign)
 
     def fantasize(
         self,
diff --git a/test/models/test_gpytorch.py b/test/models/test_gpytorch.py
@@ -44,6 +44,7 @@
 from gpytorch.likelihoods import GaussianLikelihood
 from gpytorch.means import ConstantMean
 from gpytorch.models import ExactGP, IndependentModelList
+from gpytorch.priors import LogNormalPrior
 from gpytorch.settings import trace_mode
 from torch import Tensor
 from torch.nn.functional import one_hot
@@ -1042,6 +1043,104 @@ def test_load_state_dict_with_transforms(self):
                     )
                 )
 
+    def test_load_state_dict_assign_parameter(self):
+        """Test that the assign parameter correctly controls tensor property
+        preservation.
+
+        With assign=False (default): properties of the current model's tensors are
+        preserved.
+        With assign=True: properties of the state dict's tensors are preserved.
+        """
+        # Create base model with double precision
+        tkwargs_double = {"device": self.device, "dtype": torch.double}
+        train_X_double = torch.rand(5, 2, **tkwargs_double)
+        train_Y_double = torch.sin(train_X_double).sum(dim=1, keepdim=True)
+
+        # NOTE Due to issues with transformed priors in gpytorch, we refrain from
+        # instantiating a model with a LogNormal prior here.
+        model_specs_without_priors = {
+            "covar_module": RBFKernel(ard_num_dims=2),
+            "likelihood": GaussianLikelihood(),
+        }
+        base_model = SingleTaskGP(
+            train_X=train_X_double,
+            train_Y=train_Y_double,
+            **model_specs_without_priors,
+            **_get_input_output_transform(d=2, indices=[0, 1], m=1),
+        )
+        state_dict_double = base_model.state_dict()
+
+        # Create a new model with float32 precision (different dtype)
+        tkwargs_float = {"device": self.device, "dtype": torch.float}
+        train_X_float = torch.rand(5, 2, **tkwargs_float)
+        train_Y_float = torch.sin(train_X_float).sum(dim=1, keepdim=True)
+
+        # Test assign=False (default behavior)
+        model_assign_false = SingleTaskGP(
+            train_X=train_X_float,
+            train_Y=train_Y_float,
+            **model_specs_without_priors,
+            **_get_input_output_transform(d=2, indices=[0, 1], m=1),
+        )
+
+        # Load double precision state dict with assign=False
+        model_assign_false.load_state_dict(
+            state_dict_double, keep_transforms=False, assign=False
+        )
+
+        # With assign=False, the model should keep its original float32 dtype
+        self.assertEqual(model_assign_false.train_inputs[0].dtype, torch.float)
+
+        # Test assign=True
+        model_assign_true = SingleTaskGP(
+            train_X=train_X_float,
+            train_Y=train_Y_float,
+            **model_specs_without_priors,
+            **_get_input_output_transform(d=2, indices=[0, 1], m=1),
+        )
+
+        # Load double precision state dict with assign=True
+        model_assign_true.load_state_dict(
+            state_dict_double, keep_transforms=False, assign=True
+        )
+
+        # With assign=True, the model should adopt the state dict's double dtype
+        self.assertEqual(model_assign_true.train_inputs[0].dtype, torch.double)
+        self.assertEqual(
+            model_assign_true.train_inputs[0].dtype,
+            next(iter(state_dict_double.values())).dtype,
+        )
+
+        # Verify the two models have different dtypes
+        self.assertNotEqual(
+            model_assign_false.train_inputs[0].dtype,
+            model_assign_true.train_inputs[0].dtype,
+        )
+
+        base_model_with_prior = SingleTaskGP(
+            train_X=train_X_double,
+            train_Y=train_Y_double,
+            **_get_input_output_transform(d=2, indices=[0, 1], m=1),
+        )
+        state_dict_with_prior = base_model_with_prior.state_dict()
+        state_dict_double = base_model.state_dict()
+        model_assign_true_with_prior = SingleTaskGP(
+            train_X=train_X_float,
+            train_Y=train_Y_float,
+            covar_module=RBFKernel(
+                ard_num_dims=2, lengthscale_prior=LogNormalPrior(1.23, 2.34)
+            ),
+            **_get_input_output_transform(d=2, indices=[0, 1], m=1),
+        )
+
+        model_assign_true_with_prior.load_state_dict(
+            state_dict_with_prior, keep_transforms=False, assign=True
+        )
+        self.assertAlmostEqual(
+            model_assign_true_with_prior.covar_module.lengthscale_prior.loc,
+            base_model_with_prior.covar_module.lengthscale_prior.loc,
+        )
+
     def test_load_state_dict_no_transforms(self):
         tkwargs = {"device": self.device, "dtype": torch.double}