alan-turing-institute
diff --git a/‎autoemulate/experimental/data/preprocessors.py
+2-2 b/‎autoemulate/experimental/data/preprocessors.py
+2-2
diff --git a/‎autoemulate/experimental/emulators/base.py
+11-11 b/‎autoemulate/experimental/emulators/base.py
+11-11
diff --git a/‎autoemulate/experimental/emulators/gaussian_process/exact.py
+18-27 b/‎autoemulate/experimental/emulators/gaussian_process/exact.py
+18-27
diff --git a/‎autoemulate/experimental/emulators/lightgbm.py
+18-18 b/‎autoemulate/experimental/emulators/lightgbm.py
+18-18
diff --git a/‎autoemulate/experimental/emulators/neural_processes/conditional_neural_process.py
+12-14 b/‎autoemulate/experimental/emulators/neural_processes/conditional_neural_process.py
+12-14
@@ -1,15 +1,15 @@
 from abc import ABC, abstractmethod
 
 import torch
-from autoemulate.experimental.types import InputLike
+from autoemulate.experimental.types import TensorLike
 
 
 class Preprocessor(ABC):
     @abstractmethod
     def __init__(*args, **kwargs): ...
 
     @abstractmethod
-    def preprocess(self, x: InputLike) -> InputLike: ...
+    def preprocess(self, x: TensorLike) -> TensorLike: ...
 
 
 class Standardizer(Preprocessor):
 
@@ -6,7 +6,7 @@
 from autoemulate.experimental.data.preprocessors import Preprocessor
 from autoemulate.experimental.data.utils import InputTypeMixin
 from autoemulate.experimental.data.validation import ValidationMixin
-from autoemulate.experimental.types import InputLike, OutputLike, TuneConfig
+from autoemulate.experimental.types import OutputLike, TensorLike, TuneConfig
 
 
 class Emulator(ABC, ValidationMixin):
@@ -17,26 +17,26 @@ class Emulator(ABC, ValidationMixin):
     """
 
     @abstractmethod
-    def _fit(self, x: InputLike, y: InputLike | None): ...
+    def _fit(self, x: TensorLike, y: TensorLike): ...
 
-    def fit(self, x: InputLike, y: InputLike | None):
+    def fit(self, x: TensorLike, y: TensorLike):
         self._check(x, y)
         self._fit(x, y)
 
     @abstractmethod
     def __init__(
-        self, x: InputLike | None = None, y: InputLike | None = None, **kwargs
+        self, x: TensorLike | None = None, y: TensorLike | None = None, **kwargs
     ): ...
 
     @classmethod
     def model_name(cls) -> str:
         return cls.__name__
 
     @abstractmethod
-    def _predict(self, x: InputLike) -> OutputLike:
+    def _predict(self, x: TensorLike) -> OutputLike:
         pass
 
-    def predict(self, x: InputLike) -> OutputLike:
+    def predict(self, x: TensorLike) -> OutputLike:
         self._check(x, None)
         output = self._predict(x)
         self._check_output(output)
@@ -93,7 +93,7 @@ class PyTorchBackend(nn.Module, Emulator, InputTypeMixin, Preprocessor):
     loss_fn: nn.Module = nn.MSELoss()
     optimizer: optim.Optimizer
 
-    def preprocess(self, x):
+    def preprocess(self, x: TensorLike) -> TensorLike:
         if self.preprocessor is None:
             return x
         return self.preprocessor.preprocess(x)
@@ -107,15 +107,15 @@ def loss_func(self, y_pred, y_true):
 
     def _fit(
         self,
-        x: InputLike,
-        y: InputLike | None,
+        x: TensorLike,
+        y: TensorLike,
     ):
         """
         Train a PyTorchBackend model.
 
         Parameters
         ----------
-            X: InputLike
+            X: TensorLike
                 Input features as numpy array, PyTorch tensor, or DataLoader.
             y: OutputLike or None
                 Target values (not needed if x is a DataLoader).
@@ -160,7 +160,7 @@ def _fit(
             if self.verbose and (epoch + 1) % (self.epochs // 10 or 1) == 0:
                 print(f"Epoch [{epoch + 1}/{self.epochs}], Loss: {avg_epoch_loss:.4f}")
 
-    def _predict(self, x: InputLike) -> OutputLike:
+    def _predict(self, x: TensorLike) -> OutputLike:
         self.eval()
         x = self.preprocess(x)
         return self(x)
 
@@ -5,9 +5,7 @@
 import torch
 from gpytorch import ExactMarginalLogLikelihood
 from gpytorch.distributions import MultitaskMultivariateNormal, MultivariateNormal
-from gpytorch.kernels import (
-    ScaleKernel,
-)
+from gpytorch.kernels import ScaleKernel
 from gpytorch.likelihoods import MultitaskGaussianLikelihood
 from torch import nn
 
@@ -26,15 +24,12 @@
     zero_mean,
 )
 from autoemulate.experimental.data.preprocessors import Preprocessor, Standardizer
-from autoemulate.experimental.emulators.base import (
-    Emulator,
-    InputTypeMixin,
-)
+from autoemulate.experimental.emulators.base import Emulator, InputTypeMixin
 from autoemulate.experimental.emulators.gaussian_process import (
     CovarModuleFn,
     MeanModuleFn,
 )
-from autoemulate.experimental.types import InputLike, OutputLike
+from autoemulate.experimental.types import OutputLike, TensorLike
 from autoemulate.utils import set_random_seed
 
 
@@ -53,8 +48,8 @@ class GaussianProcessExact(
 
     def __init__(  # noqa: PLR0913 allow too many arguments since all currently required
         self,
-        x: InputLike,
-        y: InputLike,
+        x: TensorLike,
+        y: TensorLike,
         likelihood_cls: type[MultitaskGaussianLikelihood] = MultitaskGaussianLikelihood,
         mean_module_fn: MeanModuleFn = constant_mean,
         covar_module_fn: CovarModuleFn = rbf,
@@ -68,6 +63,7 @@ def __init__(  # noqa: PLR0913 allow too many arguments since all currently requ
         if random_state is not None:
             set_random_seed(random_state)
 
+        # TODO (#422): update the call here to check or call e.g. `_ensure_2d`
         x, y = self._convert_to_tensors(x, y)
 
         # Initialize the mean and covariance modules
@@ -85,8 +81,6 @@ def __init__(  # noqa: PLR0913 allow too many arguments since all currently requ
             )
         )
 
-        assert isinstance(y, torch.Tensor)
-        assert isinstance(x, torch.Tensor)
         self.n_features_in_ = x.shape[1]
         self.n_outputs_ = y.shape[1] if y.ndim > 1 else 1
 
@@ -108,7 +102,6 @@ def __init__(  # noqa: PLR0913 allow too many arguments since all currently requ
 
         # Init must be called with preprocessed data
         x_preprocessed = self.preprocess(x)
-        assert isinstance(x_preprocessed, torch.Tensor)
         gpytorch.models.ExactGP.__init__(
             self,
             train_inputs=x_preprocessed,
@@ -127,24 +120,21 @@ def __init__(  # noqa: PLR0913 allow too many arguments since all currently requ
     def is_multioutput():
         return True
 
-    def preprocess(self, x: InputLike) -> InputLike:
+    def preprocess(self, x: TensorLike) -> TensorLike:
         """Preprocess the input data using the preprocessor."""
         if self.preprocessor is not None:
             x = self.preprocessor.preprocess(x)
         return x
 
-    def forward(self, x: InputLike):
-        assert isinstance(x, torch.Tensor)
+    def forward(self, x: TensorLike):
         mean = self.mean_module(x)
-
         assert isinstance(mean, torch.Tensor)
         covar = self.covar_module(x)
-
         return MultitaskMultivariateNormal.from_batch_mvn(
             MultivariateNormal(mean, covar)
         )
 
-    def log_epoch(self, epoch: int, loss: torch.Tensor):
+    def log_epoch(self, epoch: int, loss: TensorLike):
         logger = logging.getLogger(__name__)
         assert self.likelihood.noise is not None
         msg = (
@@ -153,15 +143,16 @@ def log_epoch(self, epoch: int, loss: torch.Tensor):
         )
         logger.info(msg)
 
-    def _fit(self, x: InputLike, y: InputLike | None):
+    def _fit(self, x: TensorLike, y: TensorLike):
         self.train()
         self.likelihood.train()
-        # Ensure tensors and correct shapes
-        x, y = self._convert_to_tensors(self._convert_to_dataset(x, y))
+
+        # TODO: move conversion out of _fit() and instead rely on for impl check
+        x, y = self._convert_to_tensors(x, y)
+
         optimizer = torch.optim.Adam(self.parameters(), lr=self.lr)
         mll = ExactMarginalLogLikelihood(self.likelihood, self)
         x = self.preprocess(x)
-        assert isinstance(x, torch.Tensor)
 
         # Set the training data in case changed since init
         self.set_train_data(x, y, strict=False)
@@ -176,14 +167,14 @@ def _fit(self, x: InputLike, y: InputLike | None):
             self.log_epoch(epoch, loss)
             optimizer.step()
 
-    def _predict(self, x: InputLike) -> OutputLike:
+    def _predict(self, x: TensorLike) -> OutputLike:
         self.eval()
-        x = self.preprocess(x)
-        x_tensor = self._convert_to_tensors(x)
+        # TODO: remove upon implmenting validation
         if not isinstance(x, torch.Tensor):
             msg = f"x ({x}) must be a torch.Tensor"
             raise ValueError(msg)
-        return self(x_tensor)
+        x = self.preprocess(x)
+        return self(x)
 
     @staticmethod
     def get_tune_config():
 
@@ -3,11 +3,8 @@
 from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
 from torch import Tensor
 
-from autoemulate.experimental.emulators.base import (
-    Emulator,
-    InputTypeMixin,
-)
-from autoemulate.experimental.types import InputLike, OutputLike
+from autoemulate.experimental.emulators.base import Emulator, InputTypeMixin
+from autoemulate.experimental.types import OutputLike, TensorLike
 
 
 class LightGBM(Emulator, InputTypeMixin):
@@ -20,8 +17,8 @@ class LightGBM(Emulator, InputTypeMixin):
 
     def __init__(  # noqa: PLR0913 allow too many arguments since all currently required
         self,
-        x: InputLike | None = None,
-        y: InputLike | None = None,
+        x: TensorLike | None = None,
+        y: TensorLike | None = None,
         boosting_type: str = "gbdt",
         num_leaves: int = 31,
         max_depth: int = -1,
@@ -68,28 +65,30 @@ def __init__(  # noqa: PLR0913 allow too many arguments since all currently requ
     def is_multioutput() -> bool:
         return False
 
-    def _fit(self, x: InputLike, y: InputLike | None):
+    def _fit(self, x: TensorLike, y: TensorLike):
         """
         Fits the emulator to the data.
         The model expects the input data to be:
             x (features): 2D array
             y (target): 1D array
         """
 
-        x, y = self._convert_to_numpy(x, y)
+        x_np, y_np = self._convert_to_numpy(x, y)
 
-        if y is None:
+        # TODO (#422): move to validation
+        if y_np is None:
             msg = "y must be provided."
             raise ValueError(msg)
-        if y.ndim > 2:
-            msg = f"y must be 1D or 2D array. Found {y.ndim}D array."
+        if y_np.ndim > 2:
+            msg = f"y must be 1D or 2D array. Found {y_np.ndim}D array."
             raise ValueError(msg)
-        if y.ndim == 2:  # _convert_to_numpy may return 2D y
-            y = y.ravel()  # Ensure y is 1-dimensional
+        if y_np.ndim == 2:  # _convert_to_numpy may return 2D y
+            y_np = y_np.ravel()  # Ensure y is 1-dimensional
 
-        self.n_features_in_ = x.shape[1]
+        self.n_features_in_ = x_np.shape[1]
 
-        x, y = check_X_y(x, y, y_numeric=True)
+        # TODO (#422): move to validation
+        x_np, y_np = check_X_y(x_np, y_np, y_numeric=True)
 
         self.model_ = LGBMRegressor(
             boosting_type=self.boosting_type,
@@ -113,12 +112,13 @@ def _fit(self, x: InputLike, y: InputLike | None):
             verbose=self.verbose,
         )
 
-        self.model_.fit(x, y)
+        self.model_.fit(x_np, y_np)
         self.is_fitted_ = True
 
-    def _predict(self, x: InputLike) -> OutputLike:
+    def _predict(self, x: TensorLike) -> OutputLike:
         """Predicts the output of the emulator for a given input."""
         x = check_array(x)
+        # TODO (#422): move to predict() and consider if required
         check_is_fitted(self, "is_fitted_")
         y_pred = self.model_.predict(x)
         # Ensure the output is a 2D tensor array with shape (n_samples, 1)
 
@@ -3,7 +3,7 @@
 import torch.utils
 import torch.utils.data
 from autoemulate.experimental.emulators.base import PyTorchBackend
-from autoemulate.experimental.types import DistributionLike, InputLike, TensorLike
+from autoemulate.experimental.types import DistributionLike, TensorLike
 from torch import nn
 from torch.utils.data import Dataset
 
@@ -246,8 +246,8 @@ class CNPModule(PyTorchBackend):
 
     def __init__(  # noqa: PLR0913
         self,
-        x: InputLike,
-        y: InputLike,
+        x: TensorLike,
+        y: TensorLike,
         hidden_dim: int = 32,
         latent_dim: int = 16,
         hidden_layers_enc: int = 2,
@@ -287,9 +287,10 @@ def __init__(  # noqa: PLR0913
             Batch size for training.
         """
         super().__init__()
-        x_, y_ = self._convert_to_tensors(x, y)
-        self.input_dim = x_.shape[1]
-        self.output_dim = y_.shape[1]
+        # TODO (#422): update the call here to check or call e.g. `_ensure_2d`
+        x, y = self._convert_to_tensors(x, y)
+        self.input_dim = x.shape[1]
+        self.output_dim = y.shape[1]
         self.encoder = Encoder(
             self.input_dim,
             self.output_dim,
@@ -347,11 +348,7 @@ def forward(
             reinterpreted_batch_ndims=1,
         )
 
-    def _fit(
-        self,
-        x: InputLike,
-        y: InputLike | None,
-    ):
+    def _fit(self, x: TensorLike, y: TensorLike):
         """
         Fit the model to the data.
         Note the batching of data is done internally in the method.
@@ -364,8 +361,8 @@ def _fit(
         """
         self.train()
 
-        # TODO: revisit as part of https://github.com/alan-turing-institute/autoemulate/issues/400
         # Save off all X_train and y_train
+        # TODO (#422): update the call here to check or call e.g. `_ensure_2d`
         self.x_train, self.y_train = self._convert_to_tensors(x, y)
 
         # Convert dataset to CNP Dataset
@@ -415,7 +412,7 @@ def _fit(
             if self.verbose and (epoch + 1) % (self.epochs // 10 or 1) == 0:
                 print(f"Epoch [{epoch + 1}/{self.epochs}], Loss: {avg_epoch_loss:.4f}")
 
-    def _predict(self, x: InputLike) -> DistributionLike:
+    def _predict(self, x: TensorLike) -> DistributionLike:
         """
         Predict uses the training data as the context data and the input x as the target
         data. The data is preprocessed within the method.
@@ -432,14 +429,15 @@ def _predict(self, x: InputLike) -> DistributionLike:
             Note the distribution is a single tensor of shape (n_points, output_dim).
 
         """
+        # TODO: add to validation _check
         if self.x_train is None or self.y_train is None:
             msg = "Model has not been trained. Please call fit() before predict()."
             raise ValueError(msg)
 
         self.eval()
         x = self.preprocess(x)
 
-        # Convert x to a dataset
+        # TODO: add to validation _check
         x_target = self._convert_to_tensors(x)
 
         # Sort splitting into context and target