alan-turing-institute
diff --git a/‎autoemulate/compare.py
+62-27 b/‎autoemulate/compare.py
+62-27
diff --git a/‎autoemulate/sensitivity_analysis.py
+7-7 b/‎autoemulate/sensitivity_analysis.py
+7-7
diff --git a/‎docs/_config.yml
+4-1 b/‎docs/_config.yml
+4-1
diff --git a/‎docs/_toc.yml
+2-10 b/‎docs/_toc.yml
+2-10
@@ -20,8 +20,8 @@
 from autoemulate.plotting import _plot_model
 from autoemulate.printing import _print_setup
 from autoemulate.save import ModelSerialiser
-from autoemulate.sensitivity_analysis import plot_sensitivity_analysis
-from autoemulate.sensitivity_analysis import sensitivity_analysis
+from autoemulate.sensitivity_analysis import _plot_sensitivity_analysis
+from autoemulate.sensitivity_analysis import _sensitivity_analysis
 from autoemulate.utils import _check_cv
 from autoemulate.utils import _ensure_2d
 from autoemulate.utils import _get_full_model_name
@@ -33,8 +33,8 @@
 class AutoEmulate:
     """
     The AutoEmulate class is the main class of the AutoEmulate package. It is used to set up and compare
-    different emulator models on a given dataset. It can also be used to save and load models, and to
-    print and plot the results of the comparison.
+    different emulator models on a given dataset. It can also be used to summarise and visualise results,
+    to save and load models and to run sensitivity analysis.
     """
 
     def __init__(self):
@@ -178,12 +178,13 @@ def _get_metrics(self, METRIC_REGISTRY):
         return [metric for metric in METRIC_REGISTRY.values()]
 
     def compare(self):
-        """Compares the emulator models on the data. self.setup() must be run first.
+        """Compares models using cross-validation, with the option
+        to perform hyperparameter search. self.setup() must be run first.
 
         Returns
         -------
         self.best_model : object
-            Best performing model fitted on full data.
+            Emulator with the highest cross-validation R2 score.
         """
         if not self.is_set_up:
             raise RuntimeError("Must run setup() before compare()")
@@ -257,7 +258,8 @@ def get_model(self, name=None, rank=1, metric="r2"):
         Parameters
         ----------
         name : str
-            Name of the model to return.
+            Name of the model to return. Can be full name or short name, e.g. "GaussianProcess" or "gp".
+            Short name abbreviations are the uppercase first letter of each word in the full name (e.g. "GaussianProcess" -> "gp").
         rank : int
             Rank of the model to return. Defaults to 1, which is the best model, 2 is the second best, etc.
         metric : str
@@ -298,8 +300,7 @@ def get_model(self, name=None, rank=1, metric="r2"):
         return chosen_model
 
     def refit(self, model=None):
-        """Refits model on full data.
-
+        """Refits model on full data. This is useful, as `compare()` runs only on the training data.
         Parameters
         ----------
         model : model to refit.
@@ -359,7 +360,7 @@ def load(self, path=None):
         return serialiser._load_model(path)
 
     def print_setup(self):
-        """Print the setup of the AutoEmulate object."""
+        """Print the parameters of the AutoEmulate object."""
         _print_setup(self)
 
     def summarise_cv(self, model=None, sort_by="r2"):
@@ -408,17 +409,17 @@ def plot_cv(
             If a model name is specified, plots all folds of that model.
         style : str, optional
             The type of plot to draw:
-            "Xy" observed and predicted values vs. features, including 2σ error bands where available (default).
-            "actual_vs_predicted" draws the observed values (y-axis) vs. the predicted values (x-axis) (default).
-            "residual_vs_predicted" draws the residuals, i.e. difference between observed and predicted values, (y-axis) vs. the predicted values (x-axis).
+            "Xy" for plotting observed and predicted values vs. features, including 2σ error bands where available (default).
+            "actual_vs_predicted" for plotting observed values (y-axis) vs. the predicted values (x-axis).
+            "residual_vs_predicted" for plotting the residuals, i.e. difference between observed and predicted values, (y-axis) vs. the predicted values (x-axis).
         n_cols : int
             Number of columns in the plot grid.
         figsize : tuple, optional
-            Overrides the default figure size.
+            Overrides the default figure size, in inches, e.g. (6, 4).
         output_index : int
-            Index of the output to plot. Default is 0.
+            Index of the output to plot. Default is 0. Can be a single index or a list of indices.
         input_index : int
-            Index of the input to plot. Default is 0.
+            Index of the input to plot. Default is 0. Can be a single index or a list of indices.
         """
         model_name = (
             _get_full_model_name(model, self.model_names) if model is not None else None
@@ -440,6 +441,8 @@ def evaluate(self, model=None, multioutput="uniform_average"):
         """
         Evaluates the model on the test set.
 
+        Test set size can be specified in `setup()` with `test_set_size`.
+
         Parameters
         ----------
         model : object
@@ -498,7 +501,7 @@ def plot_eval(
         output_index=0,
         input_index=0,
     ):
-        """Visualise different model evaluations on the test set.
+        """Visualise model predictive performance on the test set.
 
         Parameters
         ----------
@@ -534,28 +537,60 @@ def sensitivity_analysis(
     ):
         """Perform Sobol sensitivity analysis on a fitted emulator.
 
+        Sobol sensitivity analysis is a variance-based method that decomposes the variance of the model
+        output into contributions from individual input parameters and their interactions. It calculates:
+        - First-order indices (S1): Direct contribution of each input parameter
+        - Second-order indices (S2): Contribution from pairwise interactions between parameters
+        - Total-order indices (ST): Total contribution of a parameter, including all its interactions
+
         Parameters
         ----------
         model : object, optional
             Fitted model. If None, uses the best model from cross-validation.
         problem : dict, optional
-            The problem definition, including 'num_vars', 'names', and 'bounds', optional 'output_names'.
-            If None, the problem is generated from X using minimum and maximum values of the features as bounds.
+            The problem definition dictionary. If None, the problem is generated from X using
+            minimum and maximum values of the features as bounds. The dictionary should contain:
+
+            - 'num_vars': Number of input variables (int)
+            - 'names': List of variable names (list of str)
+            - 'bounds': List of [min, max] bounds for each variable (list of lists)
+            - 'output_names': Optional list of output names (list of str)
+
+            Example::
 
-            Example:
-                ```python
                 problem = {
                     "num_vars": 2,
                     "names": ["x1", "x2"],
                     "bounds": [[0, 1], [0, 1]],
+                    "output_names": ["y1", "y2"]  # optional
                 }
-                ```
         N : int, optional
-            Number of samples to generate. Default is 1024.
+            Number of samples to generate for the analysis. Higher values give more accurate
+            results but increase computation time. Default is 1024.
         conf_level : float, optional
-            Confidence level for the confidence intervals. Default is 0.95.
+            Confidence level (between 0 and 1) for calculating confidence intervals of the
+            sensitivity indices. Default is 0.95 (95% confidence).
         as_df : bool, optional
-            If True, return a long-format pandas DataFrame (default is True).
+            If True, returns results as a long-format pandas DataFrame with columns for
+            parameters, sensitivity indices, and confidence intervals. If False, returns
+            the raw SALib results dictionary. Default is True.
+
+        Returns
+        -------
+        pandas.DataFrame or dict
+            If as_df=True (default), returns a DataFrame with columns:
+
+            - 'parameter': Input parameter name
+            - 'output': Output variable name
+            - 'S1', 'S2', 'ST': First, second, and total order sensitivity indices
+            - 'S1_conf', 'S2_conf', 'ST_conf': Confidence intervals for each index
+
+            If as_df=False, returns the raw SALib results dictionary.
+
+        Notes
+        -----
+        The analysis requires N * (2D + 2) model evaluations, where D is the number of input
+        parameters. For example, with N=1024 and 5 parameters, this requires 12,288 evaluations.
         """
         if model is None:
             if not hasattr(self, "best_model"):
@@ -565,7 +600,7 @@ def sensitivity_analysis(
                 f"No model provided, using {get_model_name(model)}, which had the highest average cross-validation score, refitted on full data."
             )
 
-        Si = sensitivity_analysis(model, problem, self.X, N, conf_level, as_df)
+        Si = _sensitivity_analysis(model, problem, self.X, N, conf_level, as_df)
         return Si
 
     def plot_sensitivity_analysis(self, results, index="S1", n_cols=None, figsize=None):
@@ -588,4 +623,4 @@ def plot_sensitivity_analysis(self, results, index="S1", n_cols=None, figsize=No
             Figure size as (width, height) in inches.If None, automatically calculated.
 
         """
-        return plot_sensitivity_analysis(results, index, n_cols, figsize)
+        return _plot_sensitivity_analysis(results, index, n_cols, figsize)
@@ -7,7 +7,7 @@
 from autoemulate.utils import _ensure_2d
 
 
-def sensitivity_analysis(
+def _sensitivity_analysis(
     model, problem=None, X=None, N=1024, conf_level=0.95, as_df=True
 ):
     """Perform Sobol sensitivity analysis on a fitted emulator.
@@ -41,10 +41,10 @@ def sensitivity_analysis(
         containing the Sobol indices keys ‘S1’, ‘S1_conf’, ‘ST’, and ‘ST_conf’, where each entry
         is a list of length corresponding to the number of parameters.
     """
-    Si = sobol_analysis(model, problem, X, N, conf_level)
+    Si = _sobol_analysis(model, problem, X, N, conf_level)
 
     if as_df:
-        return sobol_results_to_df(Si)
+        return _sobol_results_to_df(Si)
     else:
         return Si
 
@@ -101,7 +101,7 @@ def _generate_problem(X):
     }
 
 
-def sobol_analysis(model, problem=None, X=None, N=1024, conf_level=0.95):
+def _sobol_analysis(model, problem=None, X=None, N=1024, conf_level=0.95):
     """
     Perform Sobol sensitivity analysis on a fitted emulator.
 
@@ -148,7 +148,7 @@ def sobol_analysis(model, problem=None, X=None, N=1024, conf_level=0.95):
     return results
 
 
-def sobol_results_to_df(results):
+def _sobol_results_to_df(results):
     """
     Convert Sobol results to a (long-format)pandas DataFrame.
 
@@ -205,7 +205,7 @@ def sobol_results_to_df(results):
 
 def _validate_input(results, index):
     if not isinstance(results, pd.DataFrame):
-        results = sobol_results_to_df(results)
+        results = _sobol_results_to_df(results)
         # we only want to plot one index type at a time
     valid_indices = ["S1", "S2", "ST"]
     if index not in valid_indices:
@@ -241,7 +241,7 @@ def _create_bar_plot(ax, output_data, output_name):
     ax.set_title(f"Output: {output_name}")
 
 
-def plot_sensitivity_analysis(results, index="S1", n_cols=None, figsize=None):
+def _plot_sensitivity_analysis(results, index="S1", n_cols=None, figsize=None):
     """
     Plot the sensitivity analysis results.
 
 
@@ -1,4 +1,4 @@
-title: "AutoEmulate: An emulator platform for Digital Twins"
+title: "AutoEmulate: A package for semi-automated emulation"
 author: Martin Stoffel
 # logo: logo.png
 
@@ -38,7 +38,10 @@ sphinx:
   config:
     add_module_names: False
     autodoc_typehints: none
+    autodoc_member_order: 'bysource'
     autoclass_content: class
+    autodoc_default_options:
+      exclude-members: set_score_request
     bibtex_reference_style: author_year
     intersphinx_mapping:
       python:
 
@@ -29,17 +29,8 @@ chapters:
 - file: reference/index
   sections:
   - file: reference/compare
-  - file: reference/cross_validate
   - file: reference/datasets
-  - file: reference/experimental_design
-  - file: reference/hyperparam_searching
-  - file: reference/logging_config
-  - file: reference/metrics
-  - file: reference/model_processing
-  - file: reference/plotting
-  - file: reference/printing
-  - file: reference/save
-  - file: reference/utils
+  - file: reference/sensitivity_analysis
   - file: reference/simulations/index
     sections:
     - file: reference/simulations/projectile
@@ -53,6 +44,7 @@ chapters:
     - file: reference/emulators/gradient_boosting
     - file: reference/emulators/light_gbm
     - file: reference/emulators/conditional_neural_process
+    - file: reference/emulators/conditional_neural_process_attn
     - file: reference/emulators/gaussian_process
     - file: reference/emulators/gaussian_process_mt
     - file: reference/emulators/neural_net_sk