Skip to content

Commit a8828d5

Browse files
authored
Merge pull request #274 from alan-turing-institute/release/v0.2.0
Release/v0.2.0
2 parents 9775f64 + c80fbec commit a8828d5

File tree

8 files changed

+213
-148
lines changed

8 files changed

+213
-148
lines changed

CHANGELOG.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# Changelog
2+
3+
## [0.2.0] - 2024-11-25
4+
5+
- Added Conditional and Attentive Neural Processes
6+
- Added Gaussian Processes and Multi-task Gaussian Processes with GPyTorch
7+
- Changed parts of the UI:
8+
- print_results() is now summarise_cv()
9+
- plot_results() is now plot_cv()
10+
- evaluate_model() is now evaluate()
11+
- plot_model() is now plot_eval()
12+
- Added Global Sensitivity Analysis
13+
- New visualisation: Xy plot with confidence bands
14+
- Bayesian Hyperparameter Optimization has been removed

README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,14 @@ The project is in early development.
1313

1414
## Installation
1515

16-
There's currently a lot of development, so we recommend installing the most current version from GitHub:
16+
There's lots of development at the moment, so we recommend installing the most current version from GitHub:
1717

1818
```bash
1919
pip install git+https://github.com/alan-turing-institute/autoemulate.git
2020
```
2121

22-
There's also a release available on PyPI (note: currently an older version and out of date with the documentation)
22+
There's also a release onPyPI:
23+
2324
```bash
2425
pip install autoemulate
2526
```

autoemulate/compare.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -479,9 +479,7 @@ def evaluate(self, model=None, multioutput="uniform_average"):
479479

480480
scores_df = (
481481
pd.DataFrame(scores)
482-
.assign(
483-
target=[f"target_{i}" for i in range(len(scores[next(iter(scores))]))]
484-
)
482+
.assign(target=[f"y{i}" for i in range(len(scores[next(iter(scores))]))])
485483
.assign(short=get_short_model_name(model))
486484
.assign(model=get_model_name(model))
487485
.reindex(columns=["model", "short", "target"] + list(scores.keys()))
@@ -508,17 +506,17 @@ def plot_eval(
508506
----------
509507
model : object
510508
Fitted model.
511-
plot_type : str, optional
509+
style : str, optional
512510
The type of plot to draw:
513511
"Xy" observed and predicted values vs. features, including 2σ error bands where available (default).
514512
"actual_vs_predicted" draws the observed values (y-axis) vs. the predicted values (x-axis) (default).
515513
"residual_vs_predicted" draws the residuals, i.e. difference between observed and predicted values, (y-axis) vs. the predicted values (x-axis).
516514
n_cols : int, optional
517515
Number of columns in the plot grid for multi-output. Default is 2.
518-
output_index : int
519-
Index of the output to plot. Default is 0..
520-
input_index : int
521-
Index of the input to plot. Default is 0. Only used if plot_type="Xy".
516+
output_index : list, int
517+
Index of the output to plot. Either a single index or a list of indices.
518+
input_index : list, int
519+
Index of the input to plot. Either a single index or a list of indices. Only used if style="Xy".
522520
"""
523521
fig = _plot_model(
524522
model,

autoemulate/emulators/gaussian_process.py

Lines changed: 90 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -221,85 +221,6 @@ def predict(self, X, return_std=False):
221221
def get_grid_params(self, search_type="random"):
222222
"""Returns the grid parameters for the emulator."""
223223

224-
def rbf(n_features, n_outputs):
225-
return gpytorch.kernels.RBFKernel(
226-
ard_num_dims=n_features,
227-
batch_shape=n_outputs,
228-
).initialize(lengthscale=torch.ones(n_features) * 1.5)
229-
230-
def matern_5_2_kernel(n_features, n_outputs):
231-
return gpytorch.kernels.MaternKernel(
232-
nu=2.5,
233-
ard_num_dims=n_features,
234-
batch_shape=n_outputs,
235-
)
236-
237-
def matern_3_2_kernel(n_features, n_outputs):
238-
return gpytorch.kernels.MaternKernel(
239-
nu=1.5,
240-
ard_num_dims=n_features,
241-
batch_shape=n_outputs,
242-
)
243-
244-
def rq_kernel(n_features, n_outputs):
245-
return gpytorch.kernels.RQKernel(
246-
ard_num_dims=n_features,
247-
batch_shape=n_outputs,
248-
)
249-
250-
def rbf_plus_constant(n_features, n_outputs):
251-
return (
252-
gpytorch.kernels.RBFKernel(
253-
ard_num_dims=n_features,
254-
batch_shape=n_outputs,
255-
).initialize(lengthscale=torch.ones(n_features) * 1.5)
256-
+ gpytorch.kernels.ConstantKernel()
257-
)
258-
259-
# combinations
260-
def rbf_plus_linear(n_features, n_outputs):
261-
return gpytorch.kernels.RBFKernel(
262-
ard_num_dims=n_features,
263-
batch_shape=n_outputs,
264-
) + gpytorch.kernels.LinearKernel(
265-
ard_num_dims=n_features,
266-
batch_shape=n_outputs,
267-
)
268-
269-
def matern_5_2_plus_rq(n_features, n_outputs):
270-
return gpytorch.kernels.MaternKernel(
271-
nu=2.5,
272-
ard_num_dims=n_features,
273-
batch_shape=n_outputs,
274-
) + gpytorch.kernels.RQKernel(
275-
ard_num_dims=n_features,
276-
batch_shape=n_outputs,
277-
)
278-
279-
def rbf_times_linear(n_features, n_outputs):
280-
return gpytorch.kernels.RBFKernel(
281-
ard_num_dims=n_features,
282-
batch_shape=n_outputs,
283-
) * gpytorch.kernels.LinearKernel(
284-
ard_num_dims=n_features,
285-
batch_shape=n_outputs,
286-
)
287-
288-
# means
289-
def constant_mean(n_features, n_outputs):
290-
return gpytorch.means.ConstantMean(batch_shape=n_outputs)
291-
292-
def zero_mean(n_features, n_outputs):
293-
return gpytorch.means.ZeroMean(batch_shape=n_outputs)
294-
295-
def linear_mean(n_features, n_outputs):
296-
return gpytorch.means.LinearMean(
297-
input_size=n_features, batch_shape=n_outputs
298-
)
299-
300-
def poly_mean(n_features, n_outputs):
301-
return PolyMean(degree=2, input_size=n_features, batch_shape=n_outputs)
302-
303224
if search_type == "random":
304225
param_space = {
305226
"covar_module": [
@@ -331,3 +252,93 @@ def model_name(self):
331252
def _more_tags(self):
332253
# TODO: is it really non-deterministic?
333254
return {"multioutput": True, "non_deterministic": True}
255+
256+
257+
# kernel functions for parameter search have to be outside the class so that pickle can find them
258+
def rbf(n_features, n_outputs):
259+
return gpytorch.kernels.RBFKernel(
260+
ard_num_dims=n_features,
261+
batch_shape=n_outputs,
262+
).initialize(lengthscale=torch.ones(n_features) * 1.5)
263+
264+
265+
def matern_5_2_kernel(n_features, n_outputs):
266+
return gpytorch.kernels.MaternKernel(
267+
nu=2.5,
268+
ard_num_dims=n_features,
269+
batch_shape=n_outputs,
270+
)
271+
272+
273+
def matern_3_2_kernel(n_features, n_outputs):
274+
return gpytorch.kernels.MaternKernel(
275+
nu=1.5,
276+
ard_num_dims=n_features,
277+
batch_shape=n_outputs,
278+
)
279+
280+
281+
def rq_kernel(n_features, n_outputs):
282+
return gpytorch.kernels.RQKernel(
283+
ard_num_dims=n_features,
284+
batch_shape=n_outputs,
285+
)
286+
287+
288+
def rbf_plus_constant(n_features, n_outputs):
289+
return (
290+
gpytorch.kernels.RBFKernel(
291+
ard_num_dims=n_features,
292+
batch_shape=n_outputs,
293+
).initialize(lengthscale=torch.ones(n_features) * 1.5)
294+
+ gpytorch.kernels.ConstantKernel()
295+
)
296+
297+
298+
# combinations
299+
def rbf_plus_linear(n_features, n_outputs):
300+
return gpytorch.kernels.RBFKernel(
301+
ard_num_dims=n_features,
302+
batch_shape=n_outputs,
303+
) + gpytorch.kernels.LinearKernel(
304+
ard_num_dims=n_features,
305+
batch_shape=n_outputs,
306+
)
307+
308+
309+
def matern_5_2_plus_rq(n_features, n_outputs):
310+
return gpytorch.kernels.MaternKernel(
311+
nu=2.5,
312+
ard_num_dims=n_features,
313+
batch_shape=n_outputs,
314+
) + gpytorch.kernels.RQKernel(
315+
ard_num_dims=n_features,
316+
batch_shape=n_outputs,
317+
)
318+
319+
320+
def rbf_times_linear(n_features, n_outputs):
321+
return gpytorch.kernels.RBFKernel(
322+
ard_num_dims=n_features,
323+
batch_shape=n_outputs,
324+
) * gpytorch.kernels.LinearKernel(
325+
ard_num_dims=n_features,
326+
batch_shape=n_outputs,
327+
)
328+
329+
330+
# means
331+
def constant_mean(n_features, n_outputs):
332+
return gpytorch.means.ConstantMean(batch_shape=n_outputs)
333+
334+
335+
def zero_mean(n_features, n_outputs):
336+
return gpytorch.means.ZeroMean(batch_shape=n_outputs)
337+
338+
339+
def linear_mean(n_features, n_outputs):
340+
return gpytorch.means.LinearMean(input_size=n_features, batch_shape=n_outputs)
341+
342+
343+
def poly_mean(n_features, n_outputs):
344+
return PolyMean(degree=2, input_size=n_features, batch_shape=n_outputs)

autoemulate/emulators/gaussian_process_mt.py

Lines changed: 56 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -202,51 +202,6 @@ def predict(self, X, return_std=False):
202202
def get_grid_params(self, search_type="random"):
203203
"""Returns the grid parameters for the emulator."""
204204

205-
# wrapper functions for kernel initialization at fit time (to provide ard_num_dims)
206-
# kernels
207-
def rbf_kernel(n_features):
208-
return gpytorch.kernels.RBFKernel(ard_num_dims=n_features).initialize(
209-
lengthscale=torch.ones(n_features) * 1.5
210-
)
211-
212-
def matern_5_2_kernel(n_features):
213-
return gpytorch.kernels.MaternKernel(nu=2.5, ard_num_dims=n_features)
214-
215-
def matern_3_2_kernel(n_features):
216-
return gpytorch.kernels.MaternKernel(nu=1.5, ard_num_dims=n_features)
217-
218-
def rq_kernel(n_features):
219-
return gpytorch.kernels.RQKernel(ard_num_dims=n_features)
220-
221-
# combinations
222-
def rbf_plus_linear(n_features):
223-
return gpytorch.kernels.RBFKernel(
224-
ard_num_dims=n_features
225-
) + gpytorch.kernels.LinearKernel(ard_num_dims=n_features)
226-
227-
def matern_5_2_plus_rq(n_features):
228-
return gpytorch.kernels.MaternKernel(
229-
nu=2.5, ard_num_dims=n_features
230-
) + gpytorch.kernels.RQKernel(ard_num_dims=n_features)
231-
232-
def rbf_times_linear(n_features):
233-
return gpytorch.kernels.RBFKernel(
234-
ard_num_dims=n_features
235-
) * gpytorch.kernels.LinearKernel(ard_num_dims=n_features)
236-
237-
# means
238-
def constant_mean(n_features):
239-
return gpytorch.means.ConstantMean()
240-
241-
def zero_mean(n_features):
242-
return gpytorch.means.ZeroMean()
243-
244-
def linear_mean(n_features):
245-
return gpytorch.means.LinearMean(input_size=n_features)
246-
247-
def poly_mean(n_features):
248-
return PolyMean(degree=2, input_size=n_features)
249-
250205
if search_type == "random":
251206
param_space = {
252207
"covar_module": [
@@ -277,3 +232,59 @@ def model_name(self):
277232
def _more_tags(self):
278233
# TODO: is it really non-deterministic?
279234
return {"multioutput": True, "non_deterministic": True}
235+
236+
237+
# wrapper functions for kernel initialization at fit time (to provide ard_num_dims)
238+
# move outside class to allow pickling
239+
def rbf_kernel(n_features):
240+
return gpytorch.kernels.RBFKernel(ard_num_dims=n_features).initialize(
241+
lengthscale=torch.ones(n_features) * 1.5
242+
)
243+
244+
245+
def matern_5_2_kernel(n_features):
246+
return gpytorch.kernels.MaternKernel(nu=2.5, ard_num_dims=n_features)
247+
248+
249+
def matern_3_2_kernel(n_features):
250+
return gpytorch.kernels.MaternKernel(nu=1.5, ard_num_dims=n_features)
251+
252+
253+
def rq_kernel(n_features):
254+
return gpytorch.kernels.RQKernel(ard_num_dims=n_features)
255+
256+
257+
# combinations
258+
def rbf_plus_linear(n_features):
259+
return gpytorch.kernels.RBFKernel(
260+
ard_num_dims=n_features
261+
) + gpytorch.kernels.LinearKernel(ard_num_dims=n_features)
262+
263+
264+
def matern_5_2_plus_rq(n_features):
265+
return gpytorch.kernels.MaternKernel(
266+
nu=2.5, ard_num_dims=n_features
267+
) + gpytorch.kernels.RQKernel(ard_num_dims=n_features)
268+
269+
270+
def rbf_times_linear(n_features):
271+
return gpytorch.kernels.RBFKernel(
272+
ard_num_dims=n_features
273+
) * gpytorch.kernels.LinearKernel(ard_num_dims=n_features)
274+
275+
276+
# means
277+
def constant_mean(n_features):
278+
return gpytorch.means.ConstantMean()
279+
280+
281+
def zero_mean(n_features):
282+
return gpytorch.means.ZeroMean()
283+
284+
285+
def linear_mean(n_features):
286+
return gpytorch.means.LinearMean(input_size=n_features)
287+
288+
289+
def poly_mean(n_features):
290+
return PolyMean(degree=2, input_size=n_features)

docs/tutorials/01_start.ipynb

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -813,9 +813,9 @@
813813
"source": [
814814
"Although we tried to chose default model parameters that work well in a wide range of scenarios, hyperparameter search will often find an emulator model with a better fit. Internally, `AutoEmulate` compares the performance of different models and hyperparameters using cross-validation on the training data, which can be computationally expensive and time-consuming for larger datasets. To speed it up, we can parallelise the process with `n_jobs`.\n",
815815
"\n",
816-
"For each model, we've pre-defined a search space for hyperparameters. When setting up `AutoEmulate` with `param_search=True`, we default to using random search with `param_search_iters = 20` iterations. We plan to add other hyperparameter search methods in the future. \n",
816+
"For each model, we've pre-defined a search space for hyperparameters. When setting up `AutoEmulate` with `param_search=True`, we default to using random search with `param_search_iters = 20` iterations. This means that 20 hyperparameter combinations from the search space are sampled and evaluated. We plan to add other hyperparameter search methods in the future. \n",
817817
"\n",
818-
"Let's do a hyperparameter search for the Gaussian Process and Random Forest models."
818+
"Let's do a hyperparameter search for the Support Vector Machines and Random Forest models."
819819
]
820820
},
821821
{
@@ -1352,7 +1352,7 @@
13521352
],
13531353
"source": [
13541354
"em = AutoEmulate()\n",
1355-
"em.setup(X, y, param_search=True, param_search_type=\"random\", param_search_iters=20, models=[\"GaussianProcess\", \"RandomForest\"], n_jobs=-2) # n_jobs=-2 uses all cores but one\n",
1355+
"em.setup(X, y, param_search=True, param_search_type=\"random\", param_search_iters=10, models=[\"SupportVectorMachines\", \"RandomForest\"], n_jobs=-2) # n_jobs=-2 uses all cores but one\n",
13561356
"em.compare()"
13571357
]
13581358
},
@@ -1427,7 +1427,7 @@
14271427
"metadata": {},
14281428
"source": [
14291429
"**Notes**: \n",
1430-
"* Some models, such as `GaussianProcess` can be slow to run hyperparameter search on larger datasets (say n > 1500). \n",
1430+
"* Some models, such as `GaussianProcess` can be slow when conducting hyperparameter search on larger datasets (say n > 1000). \n",
14311431
"* Use the `models` argument to only run hyperparameter search on a subset of models to speed up the process.\n",
14321432
"* When possible, use `n_jobs` to parallelise the hyperparameter search. With larger datasets, we recommend setting `param_search_iters` to a lower number, such as 5, to see how long it takes to run and then increase it if necessary.\n",
14331433
"* all models can be specified with short names too, such as `rf` for `RandomForest`, `gp` for `GaussianProcess`, `svm` for `SupportVectorMachines`, etc"

0 commit comments

Comments
 (0)