Merge pull request #257 from alan-turing-institute/optimise

mastoffel · web-flow · commit 3706802321e6 · 2024-10-10T13:34:00.000+01:00
Optimise
diff --git a/autoemulate/compare.py b/autoemulate/compare.py
@@ -1,16 +1,9 @@
-import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 from sklearn.base import BaseEstimator
 from sklearn.decomposition import PCA
-from sklearn.metrics import make_scorer
-from sklearn.model_selection import cross_validate
 from sklearn.model_selection import KFold
-from sklearn.model_selection import PredefinedSplit
-from sklearn.model_selection import train_test_split
-from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import StandardScaler
-from sklearn.utils.validation import check_is_fitted
 from sklearn.utils.validation import check_X_y
 from tqdm.autonotebook import tqdm
 
@@ -27,7 +20,6 @@
 from autoemulate.plotting import _plot_model
 from autoemulate.printing import _print_setup
 from autoemulate.save import ModelSerialiser
-from autoemulate.utils import _ensure_2d
 from autoemulate.utils import _get_full_model_name
 from autoemulate.utils import _redirect_warnings
 from autoemulate.utils import get_model_name
diff --git a/autoemulate/cross_validate.py b/autoemulate/cross_validate.py
@@ -5,8 +5,6 @@
 import pandas as pd
 from sklearn.metrics import make_scorer
 from sklearn.model_selection import cross_validate
-from sklearn.model_selection import PredefinedSplit
-from sklearn.model_selection import train_test_split
 
 from autoemulate.utils import get_model_name
 from autoemulate.utils import get_model_params
diff --git a/autoemulate/emulators/conditional_neural_process.py b/autoemulate/emulators/conditional_neural_process.py
@@ -9,7 +9,6 @@
 from sklearn.utils.validation import check_array
 from sklearn.utils.validation import check_is_fitted
 from sklearn.utils.validation import check_X_y
-from skopt.space import Real
 from skorch import NeuralNetRegressor
 from skorch.callbacks import EarlyStopping
 from skorch.callbacks import GradientNormClipping
@@ -42,8 +41,10 @@ class ConditionalNeuralProcess(RegressorMixin, BaseEstimator):
         The number of hidden units in the neural network layers.
     latent_dim : int, default=64
         The dimensionality of the latent space.
-    hidden_layers : int, default=3
-        The number of hidden layers in the neural network.
+    hidden_layers_enc : int, default=3
+        The number of hidden layers in the encoder.
+    hidden_layers_dec : int, default=3
+        The number of hidden layers in the decoder.
     min_context_points : int, default=3
         The minimum number of context points to use during training.
     max_context_points : int, default=10
@@ -108,26 +109,28 @@ def __init__(
         # architecture
         hidden_dim=64,
         latent_dim=64,
-        hidden_layers=3,
+        hidden_layers_enc=3,
+        hidden_layers_dec=3,
         # data per episode
         min_context_points=3,
         max_context_points=10,
         n_episode=32,
         # training
         max_epochs=100,
-        lr=1e-2,
+        lr=5e-3,
         batch_size=16,
         activation=nn.ReLU,
         optimizer=torch.optim.AdamW,
         normalize_y=True,
         # misc
-        device=None,
+        device="cpu",
         random_state=None,
         attention=False,
     ):
         self.hidden_dim = hidden_dim
         self.latent_dim = latent_dim
-        self.hidden_layers = hidden_layers
+        self.hidden_layers_enc = hidden_layers_enc
+        self.hidden_layers_dec = hidden_layers_dec
         self.min_context_points = min_context_points
         self.max_context_points = max_context_points
         self.n_episode = n_episode
@@ -184,7 +187,8 @@ def fit(self, X, y):
             module__output_dim=self.output_dim_,
             module__hidden_dim=self.hidden_dim,
             module__latent_dim=self.latent_dim,
-            module__hidden_layers=self.hidden_layers,
+            module__hidden_layers_enc=self.hidden_layers_enc,
+            module__hidden_layers_dec=self.hidden_layers_dec,
             module__activation=self.activation,
             dataset__min_context_points=self.min_context_points,
             dataset__max_context_points=self.max_context_points,
@@ -193,11 +197,7 @@ def fit(self, X, y):
             lr=self.lr,
             batch_size=self.batch_size,
             optimizer=self.optimizer,
-            device=self.device
-            if self.device is not None
-            else "cuda"
-            if torch.cuda.is_available()
-            else "cpu",
+            device=self.device,
             dataset=CNPDataset,  # special dataset to sample context and target sets
             criterion=CNPLoss,
             iterator_train__collate_fn=cnp_collate_fn,  # special collate to different n in episodes
@@ -260,31 +260,30 @@ def predict(self, X, return_std=False):
     def get_grid_params(search_type: str = "random"):
         param_space = {
             "max_epochs": [100, 200, 300],
-            "batch_size": [16, 32, 64],
+            "batch_size": [16, 32],
             "hidden_dim": [32, 64, 128],
             "latent_dim": [32, 64, 128],
-            "max_context_points": [10, 20, 30],
-            "hidden_layers": [1, 2, 3, 4, 5],
+            "max_context_points": [5, 10, 15],
+            "hidden_layers_enc": [2, 3, 4],
+            "hidden_layers_dec": [2, 3, 4],
             "activation": [
                 nn.ReLU,
-                # nn.Tanh,
                 nn.GELU,
-                # nn.Sigmoid,
             ],
-            # ],
-            "optimizer": [torch.optim.AdamW, torch.optim.SGD],  #
+            "optimizer": [torch.optim.AdamW],  #
+            "lr": loguniform(5e-4, 1e-3, 5e-3, 1e-2),
         }
-        # match search_type:
-        #     case "random":
-        #         param_space |= {
-        #             "lr": loguniform(1e-4, 1e-2),
-        #         }
-        #     case "bayes":
-        #         param_space |= {
-        #             "lr": Real(1e-4, 1e-2, prior="log-uniform"),
-        #         }
-        #     case _:
-        #         raise ValueError(f"Invalid search type: {search_type}")
+        # # match search_type:
+        # case "random":
+        #     param_space |= {
+        #         "lr": loguniform(1e-4, 1e-2),
+        #     }
+        # case "bayes":
+        #     param_space |= {
+        #         "lr": Real(1e-4, 1e-2, prior="log-uniform"),
+        #     }
+        # case _:
+        #     raise ValueError(f"Invalid search type: {search_type}")
 
         return param_space
 
diff --git a/autoemulate/emulators/gaussian_process_sklearn.py b/autoemulate/emulators/gaussian_process_sklearn.py
@@ -10,7 +10,6 @@
 from sklearn.utils.validation import check_is_fitted
 from sklearn.utils.validation import check_X_y
 from skopt.space import Categorical
-from skopt.space import Integer
 from skopt.space import Real
 
 from autoemulate.utils import _suppress_convergence_warnings
diff --git a/autoemulate/emulators/gaussian_process_torch.py b/autoemulate/emulators/gaussian_process_torch.py
@@ -1,30 +1,13 @@
-from copy import deepcopy
-
 import gpytorch
 import numpy as np
 import torch
-from scipy.stats import loguniform
-from scipy.stats import randint
 from sklearn.base import BaseEstimator
 from sklearn.base import RegressorMixin
-from sklearn.exceptions import DataConversionWarning
-from sklearn.metrics import r2_score
-from sklearn.model_selection import train_test_split
 from sklearn.preprocessing._data import _handle_zeros_in_scale
 from sklearn.utils import check_array
 from sklearn.utils import check_X_y
 from sklearn.utils.validation import check_is_fitted
-from skopt.space import Categorical
-from skopt.space import Integer
-from skopt.space import Real
-from skorch.callbacks import Checkpoint
-from skorch.callbacks import EarlyStopping
-from skorch.callbacks import EpochScoring
 from skorch.callbacks import LRScheduler
-from skorch.callbacks import ProgressBar
-from skorch.dataset import Dataset
-from skorch.dataset import ValidSplit
-from skorch.helper import predefined_split
 from skorch.probabilistic import ExactGPRegressor
 
 from autoemulate.emulators.gaussian_process_utils import EarlyStoppingCustom
@@ -59,7 +42,7 @@ def __init__(
         max_epochs=50,
         normalize_y=True,
         # misc
-        device=None,
+        device="cpu",
         random_state=None,
     ):
         self.mean_module = mean_module
@@ -167,11 +150,7 @@ def fit(self, X, y):
                 ),
             ],
             verbose=0,
-            device=self.device
-            if self.device is not None
-            else "cuda"
-            if torch.cuda.is_available()
-            else "cpu",
+            device=self.device,
         )
         self.model_.fit(X, y)
         self.is_fitted_ = True
diff --git a/autoemulate/emulators/gradient_boosting.py b/autoemulate/emulators/gradient_boosting.py
@@ -109,7 +109,7 @@ def get_grid_params(self, search_type="random"):
             "min_samples_leaf": randint(1, 6),
             "subsample": uniform(0.6, 0.4),  # 0.4 is the range width (1.0 - 0.6)
             "max_features": ["sqrt", "log2", None],
-            "ccp_alpha": loguniform(0.01, 0.1),
+            "ccp_alpha": loguniform(0.001, 0.1),
         }
 
         param_space_bayes = {
diff --git a/autoemulate/emulators/light_gbm.py b/autoemulate/emulators/light_gbm.py
@@ -108,7 +108,7 @@ def predict(self, X):
     def get_grid_params(self, search_type="random"):
         """Returns the grid parameters of the emulator."""
         param_space_random = {
-            "boosting_type": ["gbdt", "dart"],
+            "boosting_type": ["gbdt"],
             "num_leaves": randint(10, 100),
             "max_depth": randint(-1, 12),
             "learning_rate": loguniform(0.001, 0.1),
@@ -119,7 +119,7 @@ def get_grid_params(self, search_type="random"):
         }
 
         param_space_bayes = {
-            "boosting_type": Categorical(["gbdt", "dart"]),
+            "boosting_type": Categorical(["gbdt"]),
             "num_leaves": Integer(10, 100),
             "max_depth": Integer(-1, 12),
             "learning_rate": Real(0.001, 0.1, prior="log-uniform"),
diff --git a/autoemulate/emulators/neural_networks/cnp_module.py b/autoemulate/emulators/neural_networks/cnp_module.py
@@ -13,11 +13,17 @@ class Encoder(nn.Module):
     """
 
     def __init__(
-        self, input_dim, output_dim, hidden_dim, latent_dim, hidden_layers, activation
+        self,
+        input_dim,
+        output_dim,
+        hidden_dim,
+        latent_dim,
+        hidden_layers_enc,
+        activation,
     ):
         super().__init__()
         layers = [nn.Linear(input_dim + output_dim, hidden_dim), activation()]
-        for _ in range(hidden_layers):
+        for _ in range(hidden_layers_enc):
             layers.extend([nn.Linear(hidden_dim, hidden_dim), activation()])
         layers.append(nn.Linear(hidden_dim, latent_dim))
         self.net = nn.Sequential(*layers)
@@ -53,11 +59,17 @@ def forward(self, x_context, y_context, context_mask=None):
 
 class Decoder(nn.Module):
     def __init__(
-        self, input_dim, latent_dim, hidden_dim, output_dim, hidden_layers, activation
+        self,
+        input_dim,
+        latent_dim,
+        hidden_dim,
+        output_dim,
+        hidden_layers_dec,
+        activation,
     ):
         super().__init__()
         layers = [nn.Linear(latent_dim + input_dim, hidden_dim), activation()]
-        for _ in range(hidden_layers):
+        for _ in range(hidden_layers_dec):
             layers.extend([nn.Linear(hidden_dim, hidden_dim), activation()])
         self.net = nn.Sequential(*layers)
         self.mean_head = nn.Linear(hidden_dim, output_dim)
@@ -94,15 +106,16 @@ def __init__(
         output_dim,
         hidden_dim,
         latent_dim,
-        hidden_layers,
+        hidden_layers_enc,
+        hidden_layers_dec,
         activation=nn.ReLU,
     ):
         super().__init__()
         self.encoder = Encoder(
-            input_dim, output_dim, hidden_dim, latent_dim, hidden_layers, activation
+            input_dim, output_dim, hidden_dim, latent_dim, hidden_layers_enc, activation
         )
         self.decoder = Decoder(
-            input_dim, latent_dim, hidden_dim, output_dim, hidden_layers, activation
+            input_dim, latent_dim, hidden_dim, output_dim, hidden_layers_dec, activation
         )
 
     def forward(self, X_context, y_context, X_target=None, context_mask=None):
diff --git a/autoemulate/emulators/polynomials.py b/autoemulate/emulators/polynomials.py
@@ -8,7 +8,6 @@
 from sklearn.utils.validation import check_is_fitted
 from sklearn.utils.validation import check_X_y
 from skopt.space import Categorical
-from skopt.space import Integer
 
 
 class SecondOrderPolynomial(BaseEstimator, RegressorMixin):
diff --git a/autoemulate/emulators/radial_basis_functions.py b/autoemulate/emulators/radial_basis_functions.py
@@ -85,12 +85,7 @@ def predict(self, X):
 
     def get_grid_params(self, search_type="random"):
         """Returns the grid parameters of the emulator."""
-        # param_space_random = {
-        #     #"smoothing": uniform(0.0, 1.0),
-        #     "kernel": ["linear", "thin_plate_spline", "cubic", "quintic", "multiquadric", "inverse_multiquadric", "gaussian"],
-        #     #"epsilon": uniform(0.0, 1.0),
-        #     "degree": randint(0, 5),
-        # }
+
         param_space_random = [
             {
                 "kernel": ["linear", "multiquadric"],
diff --git a/autoemulate/emulators/random_forest.py b/autoemulate/emulators/random_forest.py
@@ -7,7 +7,6 @@
 from sklearn.utils.validation import check_X_y
 from skopt.space import Categorical
 from skopt.space import Integer
-from skopt.space import Real
 
 
 class RandomForest(BaseEstimator, RegressorMixin):
@@ -100,18 +99,18 @@ def get_grid_params(self, search_type="random"):
             "n_estimators": randint(50, 500),
             "min_samples_split": randint(2, 20),
             "min_samples_leaf": randint(1, 10),
-            "max_features": [None, "sqrt", "log2"],
+            "max_features": ["sqrt", "log2", None, 1.0],
             "bootstrap": [True, False],
             "oob_score": [True, False],
-            # # "max_depth": [None] + list(range(3, 20)),  # None plus a range of depths
-            "max_samples": [None, 0.5, 0.75],
+            "max_depth": [None] + list(range(5, 30, 5)),  # None plus a range of depths
+            "max_samples": [None, 0.5, 0.7, 0.9],
         }
 
         param_space_bayes = {
             "n_estimators": Integer(50, 500),
             "min_samples_split": Integer(2, 20),
             "min_samples_leaf": Integer(1, 10),
-            "max_features": Categorical([None, "sqrt", "log2"]),
+            "max_features": ["sqrt", "log2", 1.0, None],
             "bootstrap": Categorical([True, False]),
             "oob_score": Categorical([True, False]),
             # "max_depth": Categorical([None] + list(range(3, 20))),  # None plus a range of depths
@@ -131,27 +130,3 @@ def model_name(self):
 
     def _more_tags(self):
         return {"multioutput": True}
-
-    # def score(self, X, y, metric):
-    #     """Returns the score of the emulator.
-
-    #     Parameters
-    #     ----------
-    #     X : array-like, shape (n_samples, n_features)
-    #         Simulation input.
-    #     y : array-like, shape (n_samples, n_outputs)
-    #         Simulation output.
-    #     metric : str
-    #         Name of the metric to use, currently either rsme or r2.
-    #     Returns
-    #     -------
-    #     metric : float
-    #         Metric of the emulator.
-
-    #     """
-    #     predictions = self.predict(X)
-    #     return metric(y, predictions)
-
-    # def _more_tags(self):
-    #     return {'non_deterministic': True,
-    #             'multioutput': True}
diff --git a/autoemulate/emulators/support_vector_machines.py b/autoemulate/emulators/support_vector_machines.py
@@ -134,8 +134,6 @@ def get_grid_params(self, search_type="random"):
             "C": uniform(1.0, 3.0),
             "epsilon": uniform(0.1, 0.3),
             "shrinking": [True, False],
-            "cache_size": randint(200, 401),
-            "verbose": [False],
             "max_iter": [-1],
         }
 
diff --git a/autoemulate/logging_config.py b/autoemulate/logging_config.py
diff --git a/autoemulate/model_processing.py b/autoemulate/model_processing.py
diff --git a/autoemulate/plotting.py b/autoemulate/plotting.py
diff --git a/autoemulate/save.py b/autoemulate/save.py
diff --git a/tests/models/test_cnp.py b/tests/models/test_cnp.py

Original file line number	Diff line number	Diff line change
`@@ -109,7 +109,7 @@ def get_grid_params(self, search_type="random"):`
`109`	`109`	`"min_samples_leaf": randint(1, 6),`
`110`	`110`	`"subsample": uniform(0.6, 0.4), # 0.4 is the range width (1.0 - 0.6)`
`111`	`111`	`"max_features": ["sqrt", "log2", None],`
`112`		`- "ccp_alpha": loguniform(0.01, 0.1),`
	`112`	`+ "ccp_alpha": loguniform(0.001, 0.1),`
`113`	`113`	`}`
`114`	`114`
`115`	`115`	`param_space_bayes = {`
Original file line number	Diff line number	Diff line change
`@@ -134,8 +134,6 @@ def get_grid_params(self, search_type="random"):`
`134`	`134`	`"C": uniform(1.0, 3.0),`
`135`	`135`	`"epsilon": uniform(0.1, 0.3),`
`136`	`136`	`"shrinking": [True, False],`
`137`		`- "cache_size": randint(200, 401),`
`138`		`- "verbose": [False],`
`139`	`137`	`"max_iter": [-1],`
`140`	`138`	`}`
`141`	`139`