posterior diagnostic demo

sambit-giri · sambit-giri · commit d9bf3da9d947 · 2026-04-21T10:56:48.000+02:00
diff --git a/docs/changelog.rst b/docs/changelog.rst
@@ -2,10 +2,39 @@
 Changelog
 =========
 
+v2.4
+----
+* ``DistributionDiagnostic`` class family for comparing and diagnosing probability distributions.
+* ``SampledDistribution``: diagnose distributions from sample arrays (MCMC chains, Monte Carlo draws, bootstrap replicates); supports importance weights.
+* ``GriddedProbabilities``: same interface for distributions on a regular N-D probability grid.
+* Corner plots (``corner`` and ``getdist`` backends) with default 68% and 95% contours; mixed-dimensionality overlays supported.
+* Forest plots showing 68% and 95% credible intervals across distributions.
+* Calibration metrics: Z-score, PIT, bias, RMSE, Mahalanobis distance, coverage.
+* ``fftconvolve`` now handles arrays of unequal shape.
+
+v2.3
+----
+* GPU-accelerated topology: Euler characteristics via PyTorch, with Apple M-chip (MPS) support.
+* Radio telescope sensitivity: SEFD tables, SKA-Low Bessel primary beam, UV mapping in Lagrangian space, uniform weighting, spectral-leakage suppression.
+* Astrophysical data: fesc LyA constraints, Qin+2025 MAP reionization model, reionization observational constraints.
+* Zarr file format support.
+* Noise lightcone fixes (double ``jansky_2_kelvin`` call, decreasing-redshift input).
+* ``fftconvolve`` moved to dedicated ``fft_functions.py``.
+
+v2.2
+----
+* Bispectrum and integrated bispectrum estimators.
+* Multiple SKA layouts (AA1, AA2, AA*, AA4) with antenna-wise gain modelling.
+* UV track simulation speed-up (×10).
+* ViteBetti topology with Cython acceleration.
+* py21cmfast interface for dark-matter halo retrieval.
+* Landy-Szalay correlation function estimator.
+* Migrated build system to ``pyproject.toml``; ``scipy`` version compatibility layer.
+
 v2.1
 ----
 * Modules to analyse 21 cm images added.
-* Compatible with python 3 only
+* Compatible with python 3 only.
 
 v1.1
 ----
diff --git a/notebooks/posterior_diagnostic_demo.ipynb b/notebooks/posterior_diagnostic_demo.ipynb
diff --git a/pyproject.toml b/pyproject.toml
@@ -9,7 +9,7 @@ build-backend = "setuptools.build_meta"
 
 [tool.poetry]
 name = "tools21cm"
-version = "2.3.9"
+version = "2.4.0"
 description = "A package providing tools to analyse cosmological simulations of reionization"
 authors = ["Sambit Giri <sambit.giri@gmail.com>"]
 license = "MIT"
diff --git a/setup.py b/setup.py
@@ -36,7 +36,7 @@
 
 setup(
     name='tools21cm',
-    version='2.3.10',
+    version='2.4.0',
     author='Sambit Giri',
     author_email='sambit.giri@gmail.com',
     packages=find_packages(where="src"),
diff --git a/src/tools21cm/plotting.py b/src/tools21cm/plotting.py
@@ -1,12 +1,25 @@
+from abc import ABC, abstractmethod
 import numpy as np
-from . import xfrac_file
-from . import density_file
-from . import conv
-from .helper_functions import get_data_and_type
-
 import matplotlib.pyplot as plt
 import matplotlib.lines as mlines
 from matplotlib import colors as mcolors
+from scipy.linalg import inv
+from scipy.stats import entropy
+import pandas as pd
+
+# External dependencies for corner and mcmc plots
+try:
+    import corner
+except ImportError:
+    corner = None
+
+try:
+    from getdist import plots, MCSamples
+except ImportError:
+    plots, MCSamples = None, None
+
+from . import conv
+from .helper_functions import get_data_and_type
 
 def plot_slice(data, los_axis = 0, slice_num = 0, logscale = False, **kwargs):
     '''
@@ -249,24 +262,283 @@ def plot_triangle(samples_dict, weights_dict=None,
         c.set_plot_config(PlotConfig(bins=bins, extents=extents, smooth=smooth))
         return c.plotter.plot(**kwargs)
         
-if __name__ == '__main__':
-    import tools21cm as t2c
-    import pylab as pl
-    
-    t2c.set_verbose(True)
-    
-    pl.figure()
-    
-    dfilename = '/disk/sn-12/garrelt/Science/Simulations/Reionization/C2Ray_WMAP5/114Mpc_WMAP5/coarser_densities/nc256_halos_removed/6.905n_all.dat'
-    xfilename = '/disk/sn-12/garrelt/Science/Simulations/Reionization/C2Ray_WMAP5/114Mpc_WMAP5/114Mpc_f2_10S_256/results_ranger/xfrac3d_8.958.bin'
-    
-    dfile = t2c.DensityFile(dfilename)
-#    plot_slice(dfile, los_axis=1, logscale=True, cmap=pl.cm.hot)
-#    ax2 = pl.subplot(1,2,2)
-#    plot_slice(xfilename)
-    plot_slice(t2c.XfracFile(xfilename))
-    pl.show()
-    
-    
-    
-    
+class DistributionDiagnostic(ABC):
+    """
+    Base class for diagnosing and comparing probability distributions.
+
+    Attributes:
+        backend (str): 'corner' or 'getdist' for multidimensional plots.
+        true_values (list): Ground truth values for computing diagnostic metrics.
+        param_labels (list): LaTeX labels for the parameters (e.g., [r'\Omega_m']).
+        distributions (dict): Dictionary storing distribution data and stats.
+    """
+    _METRIC_LABELS = {
+        'Z':           r'$Z_p = |\mu_p - \theta_{\mathrm{truth},p}|\,/\,\sigma_p$',
+        'PIT':         r'$F_p(\theta_{\mathrm{truth},p})$',
+        'Bias':        r'$\tilde{\theta}_p - \theta_{\mathrm{truth},p}$',
+        'CI68':        r'$\Delta_{68,p}$',
+        'Mahalanobis': r'$D_M$',
+        'KL':          r'$D_{KL}$ (bits)',
+        'RMSE':        r'RMSE',
+        'Cover_68':    'Cover 68%',
+        'Cover_95':    'Cover 95%',
+    }
+    _PER_PARAM = {'Z', 'PIT', 'Bias', 'CI68'}
+    _IDEAL_VALUES = {
+        'Z': 0.0, 'PIT': 0.5, 'Bias': 0.0, 'Mahalanobis': 1.0,
+        'KL': 0.0, 'RMSE': 0.0
+    }
+
+    def __init__(self, backend='corner', true_values=None, param_labels=None):
+        self.backend = backend.lower()
+        self.true_values = true_values
+        self.param_labels = param_labels  # Can be None; will be generated dynamically if needed
+        self.distributions = {}
+
+        # Priority on C0-C9 for clarity, then tab20 for density
+        self.fallback_colors = [f'C{i}' for i in range(10)] + \
+                               [plt.get_cmap('tab20')(i) for i in range(20)]
+
+    def _get_default_param_labels(self, num_params):
+        """Generates labels like \theta_1, \theta_2... if param_labels is None or too short."""
+        if self.param_labels is None:
+            return [r"\theta_{%d}" % (i+1) for i in range(num_params)]
+        if len(self.param_labels) < num_params:
+            extended = list(self.param_labels)
+            for i in range(len(self.param_labels), num_params):
+                extended.append(r"\theta_{%d}" % (i+1))
+            return extended
+        return self.param_labels
+
+    def _get_distribution_label(self, label):
+        """Returns provided label or generates 'Distribution N'."""
+        if label is not None:
+            return label
+        return "Distribution %d" % (len(self.distributions) + 1)
+
+    @abstractmethod
+    def add_distribution(self, data, label=None, color=None):
+        """Must be implemented by subclasses."""
+
+    def _calculate_base_metrics(self, points, weights):
+        """Common metric calculation for weighted samples."""
+        weights_norm = weights / np.sum(weights)
+        num_params = points.shape[1]
+
+        means = np.average(points, axis=0, weights=weights_norm)
+        cov = np.cov(points.T, aweights=weights_norm)
+        sigmas = np.sqrt(np.diag(cov))
+
+        cis = []
+        for p in range(num_params):
+            data_p = points[:, p]
+            idx = np.argsort(data_p)
+            sorted_data = data_p[idx]
+            sorted_weights = weights_norm[idx]
+            cum_weights = np.cumsum(sorted_weights)
+
+            def quantile(q, _cw=cum_weights, _sd=sorted_data): return float(np.interp(q, _cw, _sd))
+            def cdf_at(val, _cw=cum_weights, _sd=sorted_data): return float(np.interp(val, _sd, _cw))
+
+            cis.append({
+                'median': quantile(0.500),
+                'lo1': quantile(0.160), 'hi1': quantile(0.840),
+                'lo2': quantile(0.025), 'hi2': quantile(0.975),
+                'cdf_at': cdf_at,
+            })
+
+        metrics = {'means': means, 'sigmas': sigmas, 'cov': cov, 'cis': cis}
+
+        if self.true_values is not None:
+            # Handle variable parameter counts
+            tv_slice = np.asarray(self.true_values)[:num_params]
+            metrics['z_scores'] = np.abs(means - tv_slice) / sigmas
+            delta = means - tv_slice
+            metrics['rmse'] = np.sqrt(np.mean(delta**2))
+
+            try:
+                metrics['mahalanobis'] = float(np.sqrt(delta @ inv(cov) @ delta))
+            except:
+                metrics['mahalanobis'] = np.nan
+
+            metrics['pit'] = np.array([cis[p]['cdf_at'](tv_slice[p]) for p in range(len(tv_slice))])
+            metrics['cover_68'] = np.array([cis[p]['lo1'] <= tv_slice[p] <= cis[p]['hi1'] for p in range(len(tv_slice))])
+            metrics['cover_95'] = np.array([cis[p]['lo2'] <= tv_slice[p] <= cis[p]['hi2'] for p in range(len(tv_slice))])
+
+            # Info gain proxy (relative to unit volume)
+            try:
+                metrics['entropy'] = 0.5 * np.log(np.linalg.det(2 * np.pi * np.e * cov))
+            except:
+                metrics['entropy'] = np.nan
+
+        return metrics
+
+    def plot_corner(self, levels=None, **kwargs):
+        """Corner/triangle plot for all added distributions.
+
+        Args:
+            levels: Contour levels as probability fractions. Default: [0.68, 0.95].
+            **kwargs: Passed to the backend (corner or getdist).
+        """
+        if not self.distributions:
+            raise ValueError("No distributions added.")
+        if levels is None:
+            levels = [0.68, 0.95]
+
+        if self.backend == 'corner':
+            return self._plot_corner_backend(levels=levels, **kwargs)
+        elif self.backend == 'getdist':
+            return self._plot_getdist_backend(levels=levels, **kwargs)
+
+    def _plot_corner_backend(self, levels=None, **kwargs):
+        if corner is None: raise ImportError("Please install 'corner'.")
+        if levels is None:
+            levels = [0.68, 0.95]
+        fig = None
+        handle_map = {}  # orig insertion index → legend handle
+
+        max_params = max([p['points'].shape[1] for p in self.distributions.values()])
+        full_labels = [f"${l}$" for l in self._get_default_param_labels(max_params)]
+
+        # Render full-dim distributions first so the base figure exists before transplanting
+        ordered = sorted(
+            enumerate(self.distributions.items()),
+            key=lambda x: -x[1][1]['points'].shape[1]
+        )
+
+        for orig_i, (name, dist) in ordered:
+            color = dist['color'] or self.fallback_colors[orig_i % len(self.fallback_colors)]
+            k = dist['points'].shape[1]
+
+            if k == max_params:
+                opts = {
+                    'labels': full_labels, 'color': color, 'levels': levels,
+                    'fill_contours': True, 'plot_datapoints': False, 'fig': fig
+                }
+                opts.update(kwargs)
+                fig = corner.corner(dist['points'], weights=dist['weights'], **opts)
+            else:
+                # Render on a temporary figure then transplant artists into the
+                # top-left k×k sub-panels of the main figure — no fake data added
+                extra_kw = {kk: vv for kk, vv in kwargs.items()
+                            if kk not in ('fig', 'labels', 'color', 'levels',
+                                          'fill_contours', 'plot_datapoints')}
+                temp_fig = corner.corner(
+                    dist['points'], weights=dist['weights'],
+                    labels=full_labels[:k], color=color, levels=levels,
+                    fill_contours=True, plot_datapoints=False, **extra_kw
+                )
+                temp_axarr = np.array(temp_fig.axes).reshape(k, k)
+                main_axarr = np.array(fig.axes).reshape(max_params, max_params)
+                for row in range(k):
+                    for col in range(k):
+                        src = temp_axarr[row, col]
+                        dst = main_axarr[row, col]
+                        for coll in list(src.collections):
+                            coll.remove()
+                            dst.add_collection(coll)
+                            coll.set_transform(dst.transData)
+                        if row == col:
+                            for ln in list(src.lines):
+                                ln.remove()
+                                dst.add_line(ln)
+                plt.close(temp_fig)
+
+            handle_map[orig_i] = mlines.Line2D([], [], color=color, label=name, lw=2)
+
+        if self.true_values:
+            corner.overplot_lines(fig, self.true_values[:max_params],
+                                  color="gray", ls="--", alpha=0.5)
+
+        handles = [handle_map[i] for i in sorted(handle_map)]
+        fig.legend(handles=handles, loc='upper right', bbox_to_anchor=(0.95, 0.95))
+        return fig
+
+    def _plot_getdist_backend(self, levels=None, **kwargs):
+        if plots is None: raise ImportError("Please install 'getdist'.")
+        if levels is None:
+            levels = [0.68, 0.95]
+        samples_list = []
+
+        max_params = max([p['points'].shape[1] for p in self.distributions.values()])
+        full_labels = self._get_default_param_labels(max_params)
+
+        colors = []
+        for i, (name, dist) in enumerate(self.distributions.items()):
+            k = dist['points'].shape[1]
+            p_names = [f"p{j}" for j in range(k)]
+            s = MCSamples(samples=dist['points'], weights=dist['weights'],
+                          names=p_names, labels=full_labels[:k], label=name)
+            samples_list.append(s)
+            colors.append(dist['color'] or self.fallback_colors[i % len(self.fallback_colors)])
+
+        g = plots.get_subplot_plotter()
+        g.triangle_plot(samples_list, filled=True, contour_levels=levels,
+                        colors=colors, markers=self.true_values, **kwargs)
+        return g
+
+    def plot_forest(self):
+        num_dist = len(self.distributions)
+        max_params = max([p['points'].shape[1] for p in self.distributions.values()])
+        labels = self._get_default_param_labels(max_params)
+
+        fig, axes = plt.subplots(1, max_params, figsize=(max_params*4, num_dist * 0.4 + 2), sharey=True)
+        if max_params == 1: axes = [axes]
+
+        for p in range(max_params):
+            ax = axes[p]
+            for i, (name, dist) in enumerate(self.distributions.items()):
+                if p >= dist['points'].shape[1]: continue
+
+                ci = dist['stats']['cis'][p]
+                color = dist['color'] or self.fallback_colors[i % len(self.fallback_colors)]
+                ax.errorbar(ci['median'], i, xerr=[[ci['median'] - ci['lo2']], [ci['hi2'] - ci['median']]],
+                            fmt='none', color=color, lw=1, alpha=0.3)
+                ax.errorbar(ci['median'], i, xerr=[[ci['median'] - ci['lo1']], [ci['hi1'] - ci['median']]],
+                            fmt='o', color=color, lw=3)
+
+            if self.true_values and p < len(self.true_values):
+                ax.axvline(self.true_values[p], color='gray', ls='--', alpha=0.6)
+            ax.set_xlabel(f'${labels[p]}$')
+            if p == 0:
+                ax.set_yticks(range(num_dist))
+                ax.set_yticklabels(list(self.distributions.keys()))
+            ax.invert_yaxis()
+        plt.tight_layout()
+        return fig
+
+
+class GriddedProbabilities(DistributionDiagnostic):
+    """Diagnoses distributions defined on a regular N-D probability grid."""
+    def __init__(self, coords_1d=None, **kwargs):
+        super().__init__(**kwargs)
+        self.coords_1d = coords_1d if coords_1d is not None else np.linspace(0, 1, 100)
+
+    def add_distribution(self, grid, label=None, color=None):
+        label = self._get_distribution_label(label)
+        ndim = grid.ndim
+        axes_coords = [self.coords_1d] * ndim
+        mesh = np.meshgrid(*axes_coords, indexing='ij')
+        points = np.vstack([m.flatten() for m in mesh]).T
+        weights = grid.flatten()
+
+        self.distributions[label] = {
+            'grid': grid, 'points': points, 'weights': weights,
+            'color': color,
+            'stats': self._calculate_base_metrics(points, weights)
+        }
+
+
+class SampledDistribution(DistributionDiagnostic):
+    """Diagnoses distributions represented as samples (e.g. MCMC chains, Monte Carlo draws)."""
+    def add_distribution(self, samples, label=None, weights=None, color=None):
+        label = self._get_distribution_label(label)
+        if weights is None:
+            weights = np.ones(len(samples))
+
+        self.distributions[label] = {
+            'points': samples, 'weights': weights,
+            'color': color,
+            'stats': self._calculate_base_metrics(samples, weights)
+        }
+