|
| 1 | +""" |
| 2 | +Morris sensitivity analysis. |
| 3 | +
|
| 4 | +Method of Morris, including groups and optimal trajectories (Morris 1991, Campolongo et al. 2007) |
| 5 | +""" |
| 6 | + |
| 7 | +from pathlib import Path |
| 8 | +from typing import Optional |
| 9 | + |
| 10 | +import SALib |
| 11 | +import numpy as np |
| 12 | +import xarray as xr |
| 13 | +from SALib import ProblemSpec |
| 14 | +from SALib.sample.morris import sample as morris_sample |
| 15 | + |
| 16 | +from sbmlsim.sensitivity import ( |
| 17 | + SensitivityAnalysis, |
| 18 | + SensitivitySimulation, |
| 19 | + SensitivityParameter, |
| 20 | + AnalysisGroup, |
| 21 | +) |
| 22 | + |
| 23 | + |
| 24 | +class MorrisSensitivityAnalysis(SensitivityAnalysis): |
| 25 | + """Morris sensitivity analysis. |
| 26 | +
|
| 27 | + Campolongo et al., [2] introduces an optimal trajectories approach which attempts to maximize the parameter space scanned for a given number of trajectories (where optimal_trajectories). The approach accomplishes this aim by randomly generating a high number of possible trajectories (500 to 1000 in [2]) and selecting a subset of r trajectories which have the highest spread in parameter space. The r variable in [2] corresponds to the optimal_trajectories parameter here. |
| 28 | +
|
| 29 | + Calculating all possible combinations of trajectories can be computationally expensive. The number of factors makes little difference, but the ratio between number of optimal trajectories and the sample size results in an exponentially increasing number of scores that must be computed to find the optimal combination of trajectories. We suggest going no higher than 4 levels from a pool of 100 samples with this “brute force” approach. |
| 30 | +
|
| 31 | + Ruano et al., [3] proposed an alternative approach with an iterative process that maximizes the distance between subgroups of generated trajectories, from which the final set of trajectories are selected, again maximizing the distance between each. The approach is not guaranteed to produce the most optimal spread of trajectories, but are at least locally maximized and significantly reduce the time taken to select trajectories. With local_optimization = True (which is default), it is possible to go higher than the previously suggested 4 levels from a pool of 100 samples. |
| 32 | +
|
| 33 | + [1] Morris, M.D., 1991. Factorial Sampling Plans for Preliminary Computational Experiments. Technometrics 33, 161-174. https://doi.org/10.1080/00401706.1991.10484804 |
| 34 | + [2] Campolongo, F., Cariboni, J., & Saltelli, A. 2007. An effective screening design for sensitivity analysis of large models. Environmental Modelling & Software, 22(10), 1509-1518. https://doi.org/10.1016/j.envsoft.2006.10.004 |
| 35 | + [3] Ruano, M.V., Ribes, J., Seco, A., Ferrer, J., 2012. An improved sampling strategy based on trajectory design for application of the Morris method to systems with many input factors. Environmental Modelling & Software 37, 103-109. https://doi.org/10.1016/j.envsoft.2012.03.008 |
| 36 | +
|
| 37 | + """ |
| 38 | + |
| 39 | + sensitivity_keys = ["mu", "mu_star", "sigma", "mu_star_conf"] |
| 40 | + |
| 41 | + def __init__( |
| 42 | + self, |
| 43 | + sensitivity_simulation: SensitivitySimulation, |
| 44 | + parameters: list[SensitivityParameter], |
| 45 | + groups: list[AnalysisGroup], |
| 46 | + results_path: Path, |
| 47 | + N: int, |
| 48 | + optimal_trajectories: int, |
| 49 | + num_levels: int = 4, |
| 50 | + local_optimization: bool = True, |
| 51 | + seed: Optional[int] = None, |
| 52 | + n_cores: Optional[int] = None, |
| 53 | + cache_results: bool = False, |
| 54 | + **kwargs, |
| 55 | + ): |
| 56 | + """ |
| 57 | + Resulting simulations are (D+1) * N/T with D number of parameters. |
| 58 | +
|
| 59 | +
|
| 60 | + N (int) – The number of trajectories to generate |
| 61 | + optimal_trajectories - The number of optimal trajectories to sample (between 2 and N) |
| 62 | + num_levels - The number of grid levels to use (should be even) |
| 63 | + local_optimization - Flag whether to use local optimization according to Ruano et al. (2012) Speeds up the process tremendously for bigger N and num_levels. If set to False brute force method is used |
| 64 | + """ |
| 65 | + |
| 66 | + super().__init__( |
| 67 | + sensitivity_simulation=sensitivity_simulation, |
| 68 | + parameters=parameters, |
| 69 | + groups=groups, |
| 70 | + results_path=results_path, |
| 71 | + seed=seed, |
| 72 | + n_cores=n_cores, |
| 73 | + cache_results=cache_results, |
| 74 | + ) |
| 75 | + self.N: int = N |
| 76 | + self.optimal_trajectories: int = optimal_trajectories |
| 77 | + self.num_levels: int = num_levels |
| 78 | + self.local_optimization: bool = local_optimization |
| 79 | + self.prefix = f"morris_levels{self.num_levels}_N{self.N}" |
| 80 | + |
| 81 | + # define the problem specification |
| 82 | + self.ssa_problems: dict[str, ProblemSpec] = {} |
| 83 | + for group in self.groups: |
| 84 | + self.ssa_problems[group.uid] = ProblemSpec( |
| 85 | + { |
| 86 | + "num_vars": self.num_parameters, |
| 87 | + "names": self.parameter_ids, |
| 88 | + "bounds": [[p.lower_bound, p.upper_bound] for p in self.parameters], |
| 89 | + "outputs": self.output_ids, |
| 90 | + } |
| 91 | + ) |
| 92 | + |
| 93 | + def create_samples(self) -> None: |
| 94 | + """Create samples using the Method of Morris. |
| 95 | + Three variants of Morris' sampling for elementary effects is supported: |
| 96 | +
|
| 97 | + - Vanilla Morris (see [1]) |
| 98 | + when ``optimal_trajectories`` is ``None``/``False`` and |
| 99 | + ``local_optimization`` is ``False`` |
| 100 | + - Optimised trajectories when ``optimal_trajectories=True`` using |
| 101 | + Campolongo's enhancements (see [2]) and optionally Ruano's enhancement |
| 102 | + (see [3]) when ``local_optimization=True`` |
| 103 | + - Morris with groups when the problem definition specifies groups of |
| 104 | + parameters |
| 105 | + """ |
| 106 | + # (num_samples x num_outputs) |
| 107 | + for gid in self.group_ids: |
| 108 | + # libssa samples based on definition |
| 109 | + morris_samples = morris_sample( |
| 110 | + self.ssa_problems[gid], |
| 111 | + N=self.N, |
| 112 | + num_levels=self.num_levels, |
| 113 | + optimal_trajectories=self.optimal_trajectories, |
| 114 | + local_optimization=self.local_optimization, |
| 115 | + ) |
| 116 | + self.ssa_problems[gid].set_samples(morris_samples) |
| 117 | + |
| 118 | + self.samples[gid] = xr.DataArray( |
| 119 | + morris_samples, |
| 120 | + dims=["sample", "parameter"], |
| 121 | + coords={ |
| 122 | + "sample": range(morris_samples.shape[0]), |
| 123 | + "parameter": self.parameter_ids, |
| 124 | + }, |
| 125 | + name="samples", |
| 126 | + ) |
| 127 | + |
| 128 | + def calculate_sensitivity( |
| 129 | + self, cache_filename: Optional[str] = None, cache: bool = False |
| 130 | + ): |
| 131 | + """Perform extended Fourier Amplitude Sensitivity Test on model outputs. |
| 132 | +
|
| 133 | + Returns a dictionary with keys 'S1' and 'ST', where each entry is a list of |
| 134 | + size D (the number of parameters) containing the indices in the same order |
| 135 | + as the parameter file. |
| 136 | +
|
| 137 | + Returns a result set with keys mu, mu_star, sigma, and mu_star_conf, where each entry corresponds to the parameters defined in the problem spec or parameter file. |
| 138 | +
|
| 139 | + mu metric indicates the mean of the distribution |
| 140 | + mu_star metric indicates the mean of the distribution of absolute values |
| 141 | + sigma is the standard deviation of the distribution |
| 142 | + """ |
| 143 | + |
| 144 | + data = self.read_cache(cache_filename, cache) |
| 145 | + if data: |
| 146 | + self.sensitivity = data |
| 147 | + return |
| 148 | + |
| 149 | + for gid in self.group_ids: |
| 150 | + Y = self.results[gid].values |
| 151 | + self.ssa_problems[gid].set_results(Y) |
| 152 | + |
| 153 | + # num_parameters x num_outputs |
| 154 | + for key in self.sensitivity_keys: |
| 155 | + self.sensitivity[gid][key] = xr.DataArray( |
| 156 | + np.full((self.num_parameters, self.num_outputs), np.nan), |
| 157 | + dims=["parameter", "output"], |
| 158 | + coords={"parameter": self.parameter_ids, "output": self.output_ids}, |
| 159 | + name=key, |
| 160 | + ) |
| 161 | + |
| 162 | + # Calculate Morris indices |
| 163 | + for ko in range(self.num_outputs): |
| 164 | + Yo = Y[:, ko] |
| 165 | + Si = SALib.analyze.morris.analyze( |
| 166 | + self.ssa_problems[gid], |
| 167 | + Yo, |
| 168 | + scaled=False, |
| 169 | + num_levels=self.num_levels, |
| 170 | + num_resamples=100, |
| 171 | + conf_level=0.95, |
| 172 | + print_to_console=False, |
| 173 | + ) |
| 174 | + for key in self.sensitivity_keys: |
| 175 | + self.sensitivity[gid][key][:, ko] = Si[key] |
| 176 | + |
| 177 | + # write to cache |
| 178 | + self.write_cache( |
| 179 | + data=self.sensitivity, cache_filename=cache_filename, cache=cache |
| 180 | + ) |
| 181 | + |
| 182 | + def plot(self): |
| 183 | + super().plot() |
| 184 | + for kg, group in enumerate(self.groups): |
| 185 | + # morris plots |
| 186 | + for key in ["ST", "S1"]: |
| 187 | + self.plot_sensitivity( |
| 188 | + group_id=group.uid, |
| 189 | + sensitivity_key=key, |
| 190 | + # title=f"{key} {group.name}", |
| 191 | + cutoff=0.05, |
| 192 | + cluster_rows=False, |
| 193 | + cmap="viridis", |
| 194 | + vcenter=0.5, |
| 195 | + vmin=0.0, |
| 196 | + vmax=1.0, |
| 197 | + fig_path=self.results_path |
| 198 | + / f"{self.prefix}_sensitivity_{kg:>02}_{group.uid}_{key}.png", |
| 199 | + ) |
0 commit comments