Skip to content

Commit 3706802

Browse files
authored
Merge pull request #257 from alan-turing-institute/optimise
Optimise
2 parents 983a012 + b68c497 commit 3706802

17 files changed

+66
-128
lines changed

autoemulate/compare.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,9 @@
1-
import matplotlib.pyplot as plt
21
import numpy as np
32
import pandas as pd
43
from sklearn.base import BaseEstimator
54
from sklearn.decomposition import PCA
6-
from sklearn.metrics import make_scorer
7-
from sklearn.model_selection import cross_validate
85
from sklearn.model_selection import KFold
9-
from sklearn.model_selection import PredefinedSplit
10-
from sklearn.model_selection import train_test_split
11-
from sklearn.pipeline import Pipeline
126
from sklearn.preprocessing import StandardScaler
13-
from sklearn.utils.validation import check_is_fitted
147
from sklearn.utils.validation import check_X_y
158
from tqdm.autonotebook import tqdm
169

@@ -27,7 +20,6 @@
2720
from autoemulate.plotting import _plot_model
2821
from autoemulate.printing import _print_setup
2922
from autoemulate.save import ModelSerialiser
30-
from autoemulate.utils import _ensure_2d
3123
from autoemulate.utils import _get_full_model_name
3224
from autoemulate.utils import _redirect_warnings
3325
from autoemulate.utils import get_model_name

autoemulate/cross_validate.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
import pandas as pd
66
from sklearn.metrics import make_scorer
77
from sklearn.model_selection import cross_validate
8-
from sklearn.model_selection import PredefinedSplit
9-
from sklearn.model_selection import train_test_split
108

119
from autoemulate.utils import get_model_name
1210
from autoemulate.utils import get_model_params

autoemulate/emulators/conditional_neural_process.py

Lines changed: 30 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
from sklearn.utils.validation import check_array
1010
from sklearn.utils.validation import check_is_fitted
1111
from sklearn.utils.validation import check_X_y
12-
from skopt.space import Real
1312
from skorch import NeuralNetRegressor
1413
from skorch.callbacks import EarlyStopping
1514
from skorch.callbacks import GradientNormClipping
@@ -42,8 +41,10 @@ class ConditionalNeuralProcess(RegressorMixin, BaseEstimator):
4241
The number of hidden units in the neural network layers.
4342
latent_dim : int, default=64
4443
The dimensionality of the latent space.
45-
hidden_layers : int, default=3
46-
The number of hidden layers in the neural network.
44+
hidden_layers_enc : int, default=3
45+
The number of hidden layers in the encoder.
46+
hidden_layers_dec : int, default=3
47+
The number of hidden layers in the decoder.
4748
min_context_points : int, default=3
4849
The minimum number of context points to use during training.
4950
max_context_points : int, default=10
@@ -108,26 +109,28 @@ def __init__(
108109
# architecture
109110
hidden_dim=64,
110111
latent_dim=64,
111-
hidden_layers=3,
112+
hidden_layers_enc=3,
113+
hidden_layers_dec=3,
112114
# data per episode
113115
min_context_points=3,
114116
max_context_points=10,
115117
n_episode=32,
116118
# training
117119
max_epochs=100,
118-
lr=1e-2,
120+
lr=5e-3,
119121
batch_size=16,
120122
activation=nn.ReLU,
121123
optimizer=torch.optim.AdamW,
122124
normalize_y=True,
123125
# misc
124-
device=None,
126+
device="cpu",
125127
random_state=None,
126128
attention=False,
127129
):
128130
self.hidden_dim = hidden_dim
129131
self.latent_dim = latent_dim
130-
self.hidden_layers = hidden_layers
132+
self.hidden_layers_enc = hidden_layers_enc
133+
self.hidden_layers_dec = hidden_layers_dec
131134
self.min_context_points = min_context_points
132135
self.max_context_points = max_context_points
133136
self.n_episode = n_episode
@@ -184,7 +187,8 @@ def fit(self, X, y):
184187
module__output_dim=self.output_dim_,
185188
module__hidden_dim=self.hidden_dim,
186189
module__latent_dim=self.latent_dim,
187-
module__hidden_layers=self.hidden_layers,
190+
module__hidden_layers_enc=self.hidden_layers_enc,
191+
module__hidden_layers_dec=self.hidden_layers_dec,
188192
module__activation=self.activation,
189193
dataset__min_context_points=self.min_context_points,
190194
dataset__max_context_points=self.max_context_points,
@@ -193,11 +197,7 @@ def fit(self, X, y):
193197
lr=self.lr,
194198
batch_size=self.batch_size,
195199
optimizer=self.optimizer,
196-
device=self.device
197-
if self.device is not None
198-
else "cuda"
199-
if torch.cuda.is_available()
200-
else "cpu",
200+
device=self.device,
201201
dataset=CNPDataset, # special dataset to sample context and target sets
202202
criterion=CNPLoss,
203203
iterator_train__collate_fn=cnp_collate_fn, # special collate to different n in episodes
@@ -260,31 +260,30 @@ def predict(self, X, return_std=False):
260260
def get_grid_params(search_type: str = "random"):
261261
param_space = {
262262
"max_epochs": [100, 200, 300],
263-
"batch_size": [16, 32, 64],
263+
"batch_size": [16, 32],
264264
"hidden_dim": [32, 64, 128],
265265
"latent_dim": [32, 64, 128],
266-
"max_context_points": [10, 20, 30],
267-
"hidden_layers": [1, 2, 3, 4, 5],
266+
"max_context_points": [5, 10, 15],
267+
"hidden_layers_enc": [2, 3, 4],
268+
"hidden_layers_dec": [2, 3, 4],
268269
"activation": [
269270
nn.ReLU,
270-
# nn.Tanh,
271271
nn.GELU,
272-
# nn.Sigmoid,
273272
],
274-
# ],
275-
"optimizer": [torch.optim.AdamW, torch.optim.SGD], #
273+
"optimizer": [torch.optim.AdamW], #
274+
"lr": loguniform(5e-4, 1e-3, 5e-3, 1e-2),
276275
}
277-
# match search_type:
278-
# case "random":
279-
# param_space |= {
280-
# "lr": loguniform(1e-4, 1e-2),
281-
# }
282-
# case "bayes":
283-
# param_space |= {
284-
# "lr": Real(1e-4, 1e-2, prior="log-uniform"),
285-
# }
286-
# case _:
287-
# raise ValueError(f"Invalid search type: {search_type}")
276+
# # match search_type:
277+
# case "random":
278+
# param_space |= {
279+
# "lr": loguniform(1e-4, 1e-2),
280+
# }
281+
# case "bayes":
282+
# param_space |= {
283+
# "lr": Real(1e-4, 1e-2, prior="log-uniform"),
284+
# }
285+
# case _:
286+
# raise ValueError(f"Invalid search type: {search_type}")
288287

289288
return param_space
290289

autoemulate/emulators/gaussian_process_sklearn.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
from sklearn.utils.validation import check_is_fitted
1111
from sklearn.utils.validation import check_X_y
1212
from skopt.space import Categorical
13-
from skopt.space import Integer
1413
from skopt.space import Real
1514

1615
from autoemulate.utils import _suppress_convergence_warnings

autoemulate/emulators/gaussian_process_torch.py

Lines changed: 2 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,13 @@
1-
from copy import deepcopy
2-
31
import gpytorch
42
import numpy as np
53
import torch
6-
from scipy.stats import loguniform
7-
from scipy.stats import randint
84
from sklearn.base import BaseEstimator
95
from sklearn.base import RegressorMixin
10-
from sklearn.exceptions import DataConversionWarning
11-
from sklearn.metrics import r2_score
12-
from sklearn.model_selection import train_test_split
136
from sklearn.preprocessing._data import _handle_zeros_in_scale
147
from sklearn.utils import check_array
158
from sklearn.utils import check_X_y
169
from sklearn.utils.validation import check_is_fitted
17-
from skopt.space import Categorical
18-
from skopt.space import Integer
19-
from skopt.space import Real
20-
from skorch.callbacks import Checkpoint
21-
from skorch.callbacks import EarlyStopping
22-
from skorch.callbacks import EpochScoring
2310
from skorch.callbacks import LRScheduler
24-
from skorch.callbacks import ProgressBar
25-
from skorch.dataset import Dataset
26-
from skorch.dataset import ValidSplit
27-
from skorch.helper import predefined_split
2811
from skorch.probabilistic import ExactGPRegressor
2912

3013
from autoemulate.emulators.gaussian_process_utils import EarlyStoppingCustom
@@ -59,7 +42,7 @@ def __init__(
5942
max_epochs=50,
6043
normalize_y=True,
6144
# misc
62-
device=None,
45+
device="cpu",
6346
random_state=None,
6447
):
6548
self.mean_module = mean_module
@@ -167,11 +150,7 @@ def fit(self, X, y):
167150
),
168151
],
169152
verbose=0,
170-
device=self.device
171-
if self.device is not None
172-
else "cuda"
173-
if torch.cuda.is_available()
174-
else "cpu",
153+
device=self.device,
175154
)
176155
self.model_.fit(X, y)
177156
self.is_fitted_ = True

autoemulate/emulators/gradient_boosting.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ def get_grid_params(self, search_type="random"):
109109
"min_samples_leaf": randint(1, 6),
110110
"subsample": uniform(0.6, 0.4), # 0.4 is the range width (1.0 - 0.6)
111111
"max_features": ["sqrt", "log2", None],
112-
"ccp_alpha": loguniform(0.01, 0.1),
112+
"ccp_alpha": loguniform(0.001, 0.1),
113113
}
114114

115115
param_space_bayes = {

autoemulate/emulators/light_gbm.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ def predict(self, X):
108108
def get_grid_params(self, search_type="random"):
109109
"""Returns the grid parameters of the emulator."""
110110
param_space_random = {
111-
"boosting_type": ["gbdt", "dart"],
111+
"boosting_type": ["gbdt"],
112112
"num_leaves": randint(10, 100),
113113
"max_depth": randint(-1, 12),
114114
"learning_rate": loguniform(0.001, 0.1),
@@ -119,7 +119,7 @@ def get_grid_params(self, search_type="random"):
119119
}
120120

121121
param_space_bayes = {
122-
"boosting_type": Categorical(["gbdt", "dart"]),
122+
"boosting_type": Categorical(["gbdt"]),
123123
"num_leaves": Integer(10, 100),
124124
"max_depth": Integer(-1, 12),
125125
"learning_rate": Real(0.001, 0.1, prior="log-uniform"),

autoemulate/emulators/neural_networks/cnp_module.py

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,17 @@ class Encoder(nn.Module):
1313
"""
1414

1515
def __init__(
16-
self, input_dim, output_dim, hidden_dim, latent_dim, hidden_layers, activation
16+
self,
17+
input_dim,
18+
output_dim,
19+
hidden_dim,
20+
latent_dim,
21+
hidden_layers_enc,
22+
activation,
1723
):
1824
super().__init__()
1925
layers = [nn.Linear(input_dim + output_dim, hidden_dim), activation()]
20-
for _ in range(hidden_layers):
26+
for _ in range(hidden_layers_enc):
2127
layers.extend([nn.Linear(hidden_dim, hidden_dim), activation()])
2228
layers.append(nn.Linear(hidden_dim, latent_dim))
2329
self.net = nn.Sequential(*layers)
@@ -53,11 +59,17 @@ def forward(self, x_context, y_context, context_mask=None):
5359

5460
class Decoder(nn.Module):
5561
def __init__(
56-
self, input_dim, latent_dim, hidden_dim, output_dim, hidden_layers, activation
62+
self,
63+
input_dim,
64+
latent_dim,
65+
hidden_dim,
66+
output_dim,
67+
hidden_layers_dec,
68+
activation,
5769
):
5870
super().__init__()
5971
layers = [nn.Linear(latent_dim + input_dim, hidden_dim), activation()]
60-
for _ in range(hidden_layers):
72+
for _ in range(hidden_layers_dec):
6173
layers.extend([nn.Linear(hidden_dim, hidden_dim), activation()])
6274
self.net = nn.Sequential(*layers)
6375
self.mean_head = nn.Linear(hidden_dim, output_dim)
@@ -94,15 +106,16 @@ def __init__(
94106
output_dim,
95107
hidden_dim,
96108
latent_dim,
97-
hidden_layers,
109+
hidden_layers_enc,
110+
hidden_layers_dec,
98111
activation=nn.ReLU,
99112
):
100113
super().__init__()
101114
self.encoder = Encoder(
102-
input_dim, output_dim, hidden_dim, latent_dim, hidden_layers, activation
115+
input_dim, output_dim, hidden_dim, latent_dim, hidden_layers_enc, activation
103116
)
104117
self.decoder = Decoder(
105-
input_dim, latent_dim, hidden_dim, output_dim, hidden_layers, activation
118+
input_dim, latent_dim, hidden_dim, output_dim, hidden_layers_dec, activation
106119
)
107120

108121
def forward(self, X_context, y_context, X_target=None, context_mask=None):

autoemulate/emulators/polynomials.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from sklearn.utils.validation import check_is_fitted
99
from sklearn.utils.validation import check_X_y
1010
from skopt.space import Categorical
11-
from skopt.space import Integer
1211

1312

1413
class SecondOrderPolynomial(BaseEstimator, RegressorMixin):

autoemulate/emulators/radial_basis_functions.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -85,12 +85,7 @@ def predict(self, X):
8585

8686
def get_grid_params(self, search_type="random"):
8787
"""Returns the grid parameters of the emulator."""
88-
# param_space_random = {
89-
# #"smoothing": uniform(0.0, 1.0),
90-
# "kernel": ["linear", "thin_plate_spline", "cubic", "quintic", "multiquadric", "inverse_multiquadric", "gaussian"],
91-
# #"epsilon": uniform(0.0, 1.0),
92-
# "degree": randint(0, 5),
93-
# }
88+
9489
param_space_random = [
9590
{
9691
"kernel": ["linear", "multiquadric"],

autoemulate/emulators/random_forest.py

Lines changed: 4 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
from sklearn.utils.validation import check_X_y
88
from skopt.space import Categorical
99
from skopt.space import Integer
10-
from skopt.space import Real
1110

1211

1312
class RandomForest(BaseEstimator, RegressorMixin):
@@ -100,18 +99,18 @@ def get_grid_params(self, search_type="random"):
10099
"n_estimators": randint(50, 500),
101100
"min_samples_split": randint(2, 20),
102101
"min_samples_leaf": randint(1, 10),
103-
"max_features": [None, "sqrt", "log2"],
102+
"max_features": ["sqrt", "log2", None, 1.0],
104103
"bootstrap": [True, False],
105104
"oob_score": [True, False],
106-
# # "max_depth": [None] + list(range(3, 20)), # None plus a range of depths
107-
"max_samples": [None, 0.5, 0.75],
105+
"max_depth": [None] + list(range(5, 30, 5)), # None plus a range of depths
106+
"max_samples": [None, 0.5, 0.7, 0.9],
108107
}
109108

110109
param_space_bayes = {
111110
"n_estimators": Integer(50, 500),
112111
"min_samples_split": Integer(2, 20),
113112
"min_samples_leaf": Integer(1, 10),
114-
"max_features": Categorical([None, "sqrt", "log2"]),
113+
"max_features": ["sqrt", "log2", 1.0, None],
115114
"bootstrap": Categorical([True, False]),
116115
"oob_score": Categorical([True, False]),
117116
# "max_depth": Categorical([None] + list(range(3, 20))), # None plus a range of depths
@@ -131,27 +130,3 @@ def model_name(self):
131130

132131
def _more_tags(self):
133132
return {"multioutput": True}
134-
135-
# def score(self, X, y, metric):
136-
# """Returns the score of the emulator.
137-
138-
# Parameters
139-
# ----------
140-
# X : array-like, shape (n_samples, n_features)
141-
# Simulation input.
142-
# y : array-like, shape (n_samples, n_outputs)
143-
# Simulation output.
144-
# metric : str
145-
# Name of the metric to use, currently either rsme or r2.
146-
# Returns
147-
# -------
148-
# metric : float
149-
# Metric of the emulator.
150-
151-
# """
152-
# predictions = self.predict(X)
153-
# return metric(y, predictions)
154-
155-
# def _more_tags(self):
156-
# return {'non_deterministic': True,
157-
# 'multioutput': True}

autoemulate/emulators/support_vector_machines.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,8 +134,6 @@ def get_grid_params(self, search_type="random"):
134134
"C": uniform(1.0, 3.0),
135135
"epsilon": uniform(0.1, 0.3),
136136
"shrinking": [True, False],
137-
"cache_size": randint(200, 401),
138-
"verbose": [False],
139137
"max_iter": [-1],
140138
}
141139

0 commit comments

Comments
 (0)