|
1 | 1 | """Functions for getting and processing models."""
|
| 2 | +from sklearn.decomposition import PCA |
2 | 3 | from sklearn.multioutput import MultiOutputRegressor
|
3 | 4 | from sklearn.pipeline import Pipeline
|
4 | 5 |
|
5 |
| - |
6 |
| -def _turn_models_into_multioutput(models, y): |
7 |
| - """Turn single output models into multioutput models if y is 2D. |
8 |
| -
|
9 |
| - Parameters |
10 |
| - ---------- |
11 |
| - models : dict |
12 |
| - Dict of model instances. |
13 |
| - y : array-like, shape (n_samples, n_outputs) |
14 |
| - Simulation output. |
15 |
| -
|
16 |
| - Returns |
17 |
| - ------- |
18 |
| - models_multi : dict |
19 |
| - Dict with model instances, where single output models are now wrapped in MultiOutputRegressor. |
20 |
| - """ |
21 |
| - |
22 |
| - models_multi = [ |
23 |
| - MultiOutputRegressor(model) |
24 |
| - if not model._more_tags()["multioutput"] and (y.ndim > 1 and y.shape[1] > 1) |
25 |
| - else model |
26 |
| - for model in models |
27 |
| - ] |
28 |
| - return models_multi |
29 |
| - |
30 |
| - |
31 |
| -def _wrap_models_in_pipeline(models, scale, scaler, reduce_dim, dim_reducer): |
32 |
| - """Wrap models in a pipeline if scale is True. |
33 |
| -
|
34 |
| - Parameters |
35 |
| - ---------- |
36 |
| - models : dict |
37 |
| - dict of model instances. |
38 |
| - scale : bool |
39 |
| - Whether to scale the data. |
40 |
| - scaler : sklearn.preprocessing object |
41 |
| - Scaler to use. |
42 |
| - reduce_dim : bool |
43 |
| - Whether to reduce the dimensionality of the data. |
44 |
| - dim_reducer : sklearn.decomposition object |
45 |
| - Dimensionality reduction method to use. |
46 |
| -
|
47 |
| - Returns |
48 |
| - ------- |
49 |
| - models_scaled : dict |
50 |
| - dict of model_names: model instances, with scaled models wrapped in a pipeline. |
51 |
| - """ |
52 |
| - |
53 |
| - models_piped = [] |
54 |
| - |
55 |
| - for model in models: |
56 |
| - steps = [] |
57 |
| - if scale: |
58 |
| - steps.append(("scaler", scaler)) |
59 |
| - if reduce_dim: |
60 |
| - steps.append(("dim_reducer", dim_reducer)) |
61 |
| - steps.append(("model", model)) |
62 |
| - # without scaling or dim reduction, the model is the only step |
63 |
| - models_piped.append(Pipeline(steps)) |
64 |
| - |
65 |
| - return models_piped |
66 |
| - |
67 |
| - |
68 |
| -def _process_models( |
69 |
| - model_registry, model_names, y, scale, scaler, reduce_dim, dim_reducer |
70 |
| -): |
71 |
| - """Get and process models. |
72 |
| -
|
73 |
| - Parameters |
74 |
| - ---------- |
75 |
| - model_registry : ModelRegistry |
76 |
| - An instance of the ModelRegistry class. |
77 |
| - model_names : list |
78 |
| - List of model names. |
79 |
| - y : array-like, shape (n_samples, n_outputs) |
80 |
| - Simulation output. |
81 |
| - scale : bool |
82 |
| - Whether to scale the data. |
83 |
| - scaler : sklearn.preprocessing object |
84 |
| - Scaler to use. |
85 |
| -
|
86 |
| - Returns |
87 |
| - ------- |
88 |
| - models : list |
89 |
| - List of model instances. |
90 |
| - """ |
91 |
| - models = model_registry.get_models(model_names) |
92 |
| - models_multi = _turn_models_into_multioutput(models, y) |
93 |
| - models_scaled = _wrap_models_in_pipeline( |
94 |
| - models_multi, scale, scaler, reduce_dim, dim_reducer |
95 |
| - ) |
96 |
| - return models_scaled |
| 6 | +from autoemulate.preprocess_target import get_dim_reducer |
| 7 | +from autoemulate.preprocess_target import InputOutputPipeline |
| 8 | +from autoemulate.preprocess_target import NoChangeTransformer |
| 9 | +from autoemulate.preprocess_target import TargetPCA |
| 10 | +from autoemulate.preprocess_target import TargetVAE |
| 11 | + |
| 12 | + |
| 13 | +class AutoEmulatePipeline: |
| 14 | + def __init__( |
| 15 | + self, |
| 16 | + model_registry, |
| 17 | + model_names, |
| 18 | + y, |
| 19 | + prep_config, |
| 20 | + scale_input=False, |
| 21 | + scaler_input=None, |
| 22 | + reduce_dim_input=False, |
| 23 | + dim_reducer_input=None, |
| 24 | + scale_output=False, |
| 25 | + scaler_output=None, |
| 26 | + reduce_dim_output=False, |
| 27 | + ): |
| 28 | + self.model_piped = None |
| 29 | + prep_name = prep_config["name"] |
| 30 | + prep_params = prep_config.get("params", {}) |
| 31 | + self.dim_reducer_output = get_dim_reducer(prep_name, **prep_params) |
| 32 | + |
| 33 | + self.models = model_registry.get_models(model_names) |
| 34 | + |
| 35 | + self._turn_models_into_multioutput(y) |
| 36 | + |
| 37 | + # Store pipeline settings as instance attributes |
| 38 | + self.scale_input = scale_input |
| 39 | + self.scaler_input = scaler_input |
| 40 | + self.reduce_dim_input = reduce_dim_input |
| 41 | + self.dim_reducer_input = dim_reducer_input |
| 42 | + self.scale_output = scale_output |
| 43 | + self.scaler_output = scaler_output |
| 44 | + self.reduce_dim_output = reduce_dim_output |
| 45 | + |
| 46 | + # Wrap the model and reducer into a pipeline |
| 47 | + self._wrap_model_reducer_in_pipeline() |
| 48 | + |
| 49 | + def _wrap_model_reducer_in_pipeline(self): |
| 50 | + """Wrap reducer in a pipeline if reduce_dim_output is True.""" |
| 51 | + self.models_piped = [] |
| 52 | + |
| 53 | + for model in self.models_multi: |
| 54 | + input_steps = [] |
| 55 | + if self.scale_input: |
| 56 | + input_steps.append(("scaler", self.scaler_input)) |
| 57 | + if self.reduce_dim_input: |
| 58 | + input_steps.append(("dim_reducer", self.dim_reducer_input)) |
| 59 | + input_steps.append(("model", model)) |
| 60 | + input_pipeline = Pipeline(input_steps) |
| 61 | + |
| 62 | + # Create output transformation pipeline |
| 63 | + output_steps = [] |
| 64 | + if self.scale_output: |
| 65 | + output_steps.append(("scaler_output", self.scaler_output)) |
| 66 | + if self.reduce_dim_output: |
| 67 | + output_steps.append(("dim_reducer_output", self.dim_reducer_output)) |
| 68 | + |
| 69 | + if output_steps: |
| 70 | + output_pipeline = Pipeline(output_steps) |
| 71 | + final_model = InputOutputPipeline( |
| 72 | + regressor=input_pipeline, transformer=output_pipeline |
| 73 | + ) |
| 74 | + self.models_piped.append(final_model) |
| 75 | + else: |
| 76 | + self.models_piped.append(input_pipeline) |
| 77 | + return self.models_piped |
| 78 | + |
| 79 | + def _turn_models_into_multioutput(self, y): |
| 80 | + """Turn single output models into multioutput models if y is 2D. |
| 81 | +
|
| 82 | + Parameters |
| 83 | + ---------- |
| 84 | + models : dict |
| 85 | + Dict of model instances. |
| 86 | + y : array-like, shape (n_samples, n_outputs) |
| 87 | + Simulation output. |
| 88 | +
|
| 89 | + Returns |
| 90 | + ------- |
| 91 | + models_multi : dict |
| 92 | + Dict with model instances, where single output models are now wrapped in MultiOutputRegressor. |
| 93 | + """ |
| 94 | + self.models_multi = [ |
| 95 | + MultiOutputRegressor(model) |
| 96 | + if not model._more_tags().get("multioutput", False) |
| 97 | + and (y.ndim > 1 and y.shape[1] > 1) |
| 98 | + else model |
| 99 | + for model in self.models |
| 100 | + ] |
| 101 | + return self.models_multi |
0 commit comments