joshspeagle · ColmTalbot · Feb 21, 2023 · May 16, 2023 · Nov 7, 2023 · Nov 28, 2023
diff --git a/py/dynesty/dynamicsampler.py b/py/dynesty/dynamicsampler.py
@@ -604,6 +604,9 @@ def _configure_batch_sampler(main_sampler,
     saved_logvol = np.array(main_sampler.saved_run['logvol'])
     saved_scale = np.array(main_sampler.saved_run['scale'])
     saved_blobs = np.array(main_sampler.saved_run['blob'])
+    saved_distances_indices = np.array(main_sampler.saved_run["distance_insertion_index"])
+    saved_log_distance_ratios = np.array(main_sampler.saved_run["log_distance_ratio"])
+    saved_likelihood_indices = np.array(main_sampler.saved_run["likelihood_insertion_index"])
     first_points = []
 
     # This will be a list of first points yielded from
@@ -721,6 +724,9 @@ def _configure_batch_sampler(main_sampler,
                 logl_min = -np.inf
 
         live_scale = saved_scale[subset0[0]]
+        live_distance_index = saved_distances_indices[subset0[0]]
+        live_log_distance_ratio = saved_log_distance_ratios[subset0[0]]
+        live_likelihood_index = saved_likelihood_indices[subset0[0]]
         # set the scale based on the lowest point
 
         # we are weighting each point by X_i to ensure
@@ -771,6 +777,9 @@ def _configure_batch_sampler(main_sampler,
         batch_sampler.live_logl = live_logl
         batch_sampler.scale = live_scale
         batch_sampler.live_blobs = live_blobs
+        batch_sampler.distance_insertion_index = live_distance_index
+        batch_sampler.log_distance_ratio = live_log_distance_ratio
+        batch_sampler.likelihood_insertion_index = live_likelihood_index
 
         batch_sampler.update_bound_if_needed(logl_min)
         # Trigger an update of the internal bounding distribution based
@@ -1105,7 +1114,8 @@ def results(self):
                 ('bound_iter', np.array(self.saved_run['bounditer'])))
             results.append(
                 ('samples_bound', np.array(self.saved_run['boundidx'])))
-            results.append(('scale', np.array(self.saved_run['scale'])))
+            for key in ['scale', 'distance_insertion_index', 'log_distance_ratio', 'likelihood_insertion_index']:
+                results.append((key, np.array(self.saved_run[key])))
 
         return Results(results)
 
@@ -1358,7 +1368,11 @@ def sample_initial(self,
                             blob=results.blob,
                             boundidx=results.boundidx,
                             bounditer=results.bounditer,
-                            scale=self.sampler.scale)
+                            scale=self.sampler.scale,
+                            distance_insertion_index=self.sampler.distance_insertion_index,
+                            log_distance_ratio=self.sampler.log_distance_ratio,
+                            likelihood_insertion_index=self.sampler.likelihood_insertion_index,
+                            )
 
             self.base_run.append(add_info)
             self.saved_run.append(add_info)
@@ -1403,7 +1417,11 @@ def sample_initial(self,
                             n=self.nlive_init - it,
                             boundidx=results.boundidx,
                             bounditer=results.bounditer,
-                            scale=self.sampler.scale)
+                            scale=self.sampler.scale,
+                            distance_insertion_index=-1,
+                            log_distance_ratio=-1,
+                            likelihood_insertion_index=-1,
+                            )
 
             self.base_run.append(add_info)
             self.saved_run.append(add_info)
@@ -1611,7 +1629,11 @@ def sample_batch(self,
                      n=nlive_new,
                      boundidx=results.boundidx,
                      bounditer=results.bounditer,
-                     scale=batch_sampler.scale)
+                     scale=batch_sampler.scale,
+                     distance_insertion_index=batch_sampler.distance_insertion_index,
+                     log_distance_ratio=batch_sampler.log_distance_ratio,
+                     likelihood_insertion_index=batch_sampler.likelihood_insertion_index,
+                     )
             self.new_run.append(D)
             # Increment relevant counters.
             self.ncall += results.nc
@@ -1660,7 +1682,11 @@ def sample_batch(self,
                      blob=results.blob,
                      boundidx=results.boundidx,
                      bounditer=results.bounditer,
-                     scale=batch_sampler.scale)
+                     scale=batch_sampler.scale,
+                     distance_insertion_index=-1,
+                     log_distance_ratio=-1,
+                     likelihood_insertion_index=-1,
+                     )
             self.new_run.append(D)
 
             # Increment relevant counters.
@@ -1693,7 +1719,10 @@ def combine_runs(self):
 
         for k in [
                 'id', 'u', 'v', 'logl', 'nc', 'boundidx', 'it', 'bounditer',
-                'n', 'scale', 'blob', 'logvol'
+                'n', 'scale', 'blob', 'logvol',
+                'distance_insertion_index',
+                'log_distance_ratio',
+                'likelihood_insertion_index',
         ]:
             saved_d[k] = np.array(self.saved_run[k])
             new_d[k] = np.array(self.new_run[k])
@@ -1745,7 +1774,10 @@ def combine_runs(self):
 
             for k in [
                     'id', 'u', 'v', 'logl', 'nc', 'boundidx', 'it',
-                    'bounditer', 'scale', 'blob'
+                    'bounditer', 'scale', 'blob',
+                    'distance_insertion_index',
+                    'log_distance_ratio',
+                    'likelihood_insertion_index',
             ]:
                 add_info[k] = add_source[k][add_idx]
             self.saved_run.append(add_info)

diff --git a/py/dynesty/plotting.py b/py/dynesty/plotting.py
@@ -18,6 +18,7 @@
 from .utils import resample_equal, unitcheck
 from .utils import quantile as _quantile
 from .utils import get_random_generator, get_nonbounded
+from .utils import insertion_index_test
 from . import bounding
 
 str_type = str
@@ -26,7 +27,7 @@
 
 __all__ = [
     "runplot", "traceplot", "cornerpoints", "cornerplot", "boundplot",
-    "cornerbound", "_hist2d"
+    "cornerbound", "_hist2d", "insertionplot",
 ]
 
 
@@ -2391,3 +2392,32 @@ def _hist2d(x,
 
     ax.set_xlim(span[0])
     ax.set_ylim(span[1])
+
+def insertionplot(results, figsize=(8, 5)):
+    """
+    Plot the fractional insertion indices for likelihood and distance.
+
+    A trace is added for each unique number of live points.
+    The p-value comparing with the expected distribution is shown in the
+    legend.
+
+    Parameters
+    ----------
+    results : :class:`~dynesty.results.Results` instance
+        A :class:`~dynesty.results.Results` instance from a nested
+        sampling run.
+
+    Returns
+    -------
+    insertionplot : (`~matplotlib.figure.Figure`, `~matplotlib.axes.Axis`)
+        Output insertion index plot.
+    """
+    fig = pl.figure(figsize=figsize)
+    ax = pl.gca()
+    for kind in ["likelihood", "distance"]:
+        insertion_index_test(result=results, kind=kind, ax=ax)
+    ax.set_ylabel("Insertion index")
+    ax.set_xlim(0, 1)
+    ax.legend(loc="best")
+    ax.set_xlabel("Index / $n_{\\rm live}$")
+    return fig, ax
diff --git a/py/dynesty/sampler.py b/py/dynesty/sampler.py
@@ -14,7 +14,8 @@
 from .results import Results, print_fn
 from .bounding import UnitCube
 from .sampling import sample_unif, SamplerArgument
-from .utils import (get_seed_sequence, get_print_func, progress_integration,
+from .utils import (get_random_generator, get_seed_sequence,
+                    get_print_func, progress_integration,
                     IteratorResult, RunRecord, get_neff_from_logwt,
                     compute_integrals, DelayTimer, _LOWL_VAL)
 
@@ -104,6 +105,9 @@ def __init__(self,
 
         # set to none just for qa
         self.scale = None
+        self.distance_insertion_index = -1
+        self.log_distance_ratio = -1
+        self.likelihood_insertion_index = -1
         self.method = None
         self.kwargs = {}
 
@@ -261,7 +265,8 @@ def results(self):
                                         dtype=int)))
             results.append(('samples_bound',
                             np.array(self.saved_run['boundidx'], dtype=int)))
-            results.append(('scale', np.array(self.saved_run['scale'])))
+        for key in ['scale', 'distance_insertion_index', 'log_distance_ratio', 'likelihood_insertion_index']:
+            results.append((key, np.array(self.saved_run.D[key])))
 
         return Results(results)
 
@@ -395,6 +400,52 @@ def _fill_queue(self, loglstar):
                                 rseed=seeds[i],
                                 kwargs=self.kwargs))
         self.queue = list(mapper(evolve_point, args))
+        for ii, start in enumerate(point_queue):
+            blob = self.queue[ii][-1]
+            if (
+                isinstance(blob, dict)
+                and not self.unit_cube_sampling
+                and "start" not in blob
+            ):
+                blob["start"] = start
+
+    def _distance_insertion_index(self, start, point):
+        r"""
+        Compute the distance insertion index.
+        This is entirely analogous to the likelihood insertion index except we use
+        the distance to the start point instead of the likelihood. This method is more
+        robust to multimodal posteriors and likelihood plateaus.
+        We define the distance as
+
+        .. math::
+
+            d = \sqrt{\sum_i \frac{(x_i - x_{i,0})^2}{\sigma_i^2}}
+
+        where :math:`x_{i,0}` is the starting point and :math:`\sigma_i` is the standard
+        deviation of the live points along the ith axis.
+        """
+        norms = np.std(self.live_u, axis=0)
+        distance = np.linalg.norm((point - start) / norms)
+        all_distances = np.array([np.linalg.norm((start - u) / norms) for u in self.live_u])
+        idx = sum(all_distances < distance)
+        if distance == 0:
+            log_ratio = np.inf
+        else:
+            rstate = get_random_generator(self.rstate)
+            other = self.live_u[rstate.choice(len(self.live_u))]
+            other_distance = np.linalg.norm((other - start) / norms)
+            while other_distance == 0:
+                other = self.live_u[rstate.choice(len(self.live_u))]
+                other_distance = np.linalg.norm((other - start) / norms)
+            alt_distance = np.linalg.norm((point - other) / norms)
+            log_ratio = self.ndim * (np.log(other_distance / distance) + np.log(other_distance / alt_distance)) / 2
+        return log_ratio, idx
+
+    def _likelihood_insertion_index(self, logl):
+        """
+        Compute the likelihood insertion index as defined in arxiv:2006.03371
+        """
+        return sum(np.array(self.live_logl) < logl)
 
     def _get_point_value(self, loglstar):
         """Grab the first live point proposal in the queue."""
@@ -421,6 +472,9 @@ def _new_point(self, loglstar):
             u, v, logl, nc, blob = self._get_point_value(loglstar)
             ncall += nc
             ncall_accum += nc
+            if blob is not None:
+                self.distance_insertion_index = blob.get("distance_insertion", 0)
+                self.likelihood_insertion_index = blob.get("likelihood_insertion", 0)
 
             if blob is not None and not self.unit_cube_sampling:
                 # If our queue is empty, update any tuning parameters
@@ -429,6 +483,8 @@ def _new_point(self, loglstar):
                 # If it's not empty we are just accumulating the
                 # the history of evaluations
                 self.update_proposal(blob, update=self.nqueue <= 0)
+                self.log_distance_ratio, self.distance_insertion_index = self._distance_insertion_index(blob["start"], u)
+            self.likelihood_insertion_index = self._likelihood_insertion_index(logl)
 
             # the reason I'm not using self.ncall is that it's updated at
             # higher level
@@ -558,7 +614,11 @@ def add_live_points(self):
                         it=point_it,
                         bounditer=bounditer,
                         scale=self.scale,
-                        blob=old_blob))
+                        blob=old_blob,
+                        distance_insertion_index=-1,
+                        log_distance_ratio=-1,
+                        likelihood_insertion_index=-1,
+                        ))
             self.eff = 100. * (self.it + i) / self.ncall  # efficiency
 
             # Return our new "dead" point and ancillary quantities.
@@ -589,7 +649,10 @@ def _remove_live_points(self):
                 for k in [
                         'id', 'u', 'v', 'logl', 'logvol', 'logwt', 'logz',
                         'logzvar', 'h', 'nc', 'boundidx', 'it', 'bounditer',
-                        'scale', 'blob'
+                        'scale', 'blob',
+                        'distance_insertion_index',
+                        'log_distance_ratio',
+                        'likelihood_insertion_index',
                 ]:
                     del self.saved_run[k][-self.nlive:]
         else:
@@ -879,7 +942,11 @@ def sample(self,
                          it=worst_it,
                          bounditer=bounditer,
                          scale=self.scale,
-                         blob=old_blob))
+                         blob=old_blob,
+                         distance_insertion_index=self.distance_insertion_index,
+                         log_distance_ratio=self.log_distance_ratio,
+                         likelihood_insertion_index=self.likelihood_insertion_index,
+                         ))
 
             # Update the live point (previously our "worst" point).
             self.live_u[worst] = u