workshop-blackjax-nested-sampling/workshop_nested_sampling.py at master · handley-lab/workshop-blackjax-nested-sampling · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# | # BlackJAX Nested Sampling Workshop
# |
# | This workshop demonstrates GPU-native nested sampling using BlackJAX. We'll progress through three examples: line fitting, 2D Gaussian inference, and performance comparisons with other samplers. The workshop showcases JAX's key strengths: automatic differentiation and JIT compilation for high-performance Bayesian inference.
# ------------------------------------------------
# | ## Installation for Google Colab
# |```bash
# | pip install git+https://github.com/handley-lab/blackjax@nested_sampling
# | pip install anesthetic tqdm
# |```

import jax
# | Configure JAX immediately after import
jax.config.update("jax_enable_x64", True)

import jax.numpy as jnp
import numpy as np
import matplotlib.pyplot as plt
import tqdm
import time
import blackjax
from anesthetic import NestedSamples

# | ## Part 1: Line Fitting with Nested Sampling
# |
# | We start with the classic problem of fitting a linear model y = mx + c to noisy data.
# | This introduces the basic nested sampling workflow in BlackJAX.


# | ### 1.1 Nested Sampling Configuration
# |
# | Key parameters for workshop timing and educational value:
# | - `num_live=100`: Fast convergence for workshop setting
# | - `num_delete=50`: Parallelization parameter
# | - `num_inner_steps`: Reliability parameter (rule of thumb: 5 * num_dims)
rng_key = jax.random.PRNGKey(42)
num_live = 100
num_delete = 50

# | ### 1.2 Generate Synthetic Line Data
# |
# | True model: y = 2x + 1 + noise, with σ = 0.5
num_data = 15
x = jnp.linspace(-2.0, 2.0, num_data)
true = {'m': 2.0, 'c': 1.0, 'sigma': 0.5}

key, rng_key = jax.random.split(rng_key)
noise = true['sigma'] * jax.random.normal(key, (num_data,))
y = true['m'] * x + true['c'] + noise

# | Visualize the data
fig, ax = plt.subplots(figsize=(8, 5))
ax.errorbar(x, y, yerr=true['sigma'], fmt="o", label="Observed data", color='black')
ax.plot(x, true['m'] * x + true['c'], '--', label="True model", color='red', alpha=0.7)
ax.set_xlabel("x")
ax.set_ylabel("y")
ax.legend()
ax.set_title("Linear Model: Bayesian Parameter Estimation")

# | ### 1.3 Define Likelihood Function
# |
# | Gaussian likelihood with unknown slope, intercept, and noise level
def line_loglikelihood(params):
    """Log-likelihood for linear model with Gaussian noise."""
    m, c, sigma = params["m"], params["c"], params["sigma"]
    y_model = m * x + c
    # Vectorized normal log-likelihood
    return jax.scipy.stats.multivariate_normal.logpdf(y, y_model, sigma**2)

# | ### 1.4 Define Prior Distributions
prior_bounds = {
    "m": (-5.0, 5.0),      # slope
    "c": (-5.0, 5.0),      # intercept
    "sigma": (0.1, 2.0),   # noise level (positive)
}

num_dims = len(prior_bounds)
num_inner_steps = num_dims * 5

# | ### 1.5 Initialize Nested Sampler
rng_key, prior_key = jax.random.split(rng_key)
particles, logprior_fn = blackjax.ns.utils.uniform_prior(prior_key, num_live, prior_bounds)

nested_sampler = blackjax.nss(
    logprior_fn=logprior_fn,
    loglikelihood_fn=line_loglikelihood,
    num_delete=num_delete,
    num_inner_steps=num_inner_steps,
)
print(f"Initialized nested sampler with {num_live} live points")

# | ### 1.6 JIT Compile for Performance
init_fn = jax.jit(nested_sampler.init)
step_fn = jax.jit(nested_sampler.step)
print("Functions compiled - ready to run!")

# | ### 1.7 Run the Nested Sampling
print("Running nested sampling for line fitting...")
ns_start = time.time()
live = init_fn(particles)
dead = []

with tqdm.tqdm(desc="Dead points", unit=" dead points") as pbar:
    while not live.logZ_live - live.logZ < -3:  # Convergence criterion
        rng_key, subkey = jax.random.split(rng_key, 2)
        live, dead_info = step_fn(subkey, live)
        dead.append(dead_info)
        pbar.update(num_delete)

dead = blackjax.ns.utils.finalise(live, dead)
ns_time = time.time() - ns_start

# | ### 1.8 Process Results with Anesthetic
columns = ["m", "c", "sigma"]
labels = [r"$m$", r"$c$", r"$\sigma$"]
data = jnp.vstack([dead.particles[key] for key in columns]).T

line_samples = NestedSamples(
    data,
    logL=dead.loglikelihood,
    logL_birth=dead.loglikelihood_birth,
    columns=columns,
    labels=labels,
    logzero=jnp.nan,
)

# | ### 1.9 Results Analysis and Visualization
print(f"Nested sampling runtime: {ns_time:.2f} seconds")
print(f"Log Evidence: {line_samples.logZ():.2f} ± {line_samples.logZ(100).std():.2f}")
print(f"True parameters: m={true['m']}, c={true['c']}, σ={true['sigma']}")
print(f"Posterior means: m={line_samples.m.mean():.2f}, c={line_samples.c.mean():.2f}, σ={line_samples.sigma.mean():.2f}")

# Create posterior corner plot with true values marked
kinds = {'lower': 'kde_2d', 'diagonal': 'hist_1d', 'upper': 'scatter_2d'}
axes = line_samples.plot_2d(columns, kinds=kinds, label='Posterior')
axes.axlines(true, color='red', linestyle='--', alpha=0.8)
plt.suptitle("Line Fitting: Posterior Distributions")

# | ## Part 2: 2D Gaussian Parameter Inference
# |
# | Now we tackle a more complex problem: inferring the parameters of a 2D Gaussian distribution
# | including the correlation coefficient. This demonstrates parameter transforms and constrained sampling.


# | ### 2.1 Define 2D Gaussian Parameters
true.update({
    'mu1': 1.0, 'mu2': -0.5,
    'sigma1': 1.2, 'sigma2': 0.8,
    'rho': 0.6
})
print("True parameters:", {k: v for k, v in true.items() if k in ['mu1', 'mu2', 'sigma1', 'sigma2', 'rho']})

# | ### 2.2 Generate Correlated 2D Data
true_mu = jnp.array([true['mu1'], true['mu2']])
true_cov = jnp.array([
    [true['sigma1']**2, true['rho'] * true['sigma1'] * true['sigma2']],
    [true['rho'] * true['sigma1'] * true['sigma2'], true['sigma2']**2]
])

num_samples = 200
key, rng_key = jax.random.split(rng_key)
gaussian_data = jax.random.multivariate_normal(key, true_mu, true_cov, (num_samples,))

print(f"Generated {num_samples} correlated 2D samples")
print(f"Sample mean: [{gaussian_data.mean(0)[0]:.2f}, {gaussian_data.mean(0)[1]:.2f}]")

# | ### 2.3 Visualize the 2D Data
fig, ax = plt.subplots(figsize=(8, 6))
ax.scatter(gaussian_data[:, 0], gaussian_data[:, 1], alpha=0.6, s=20)
ax.set_xlabel(r"$x_1$")
ax.set_ylabel(r"$x_2$")
ax.set_title("2D Gaussian Data")
ax.grid(True, alpha=0.3)

# | ### 2.4 Define Likelihood with Parameter Transforms
# |
# | We use arctanh/tanh transform for the correlation coefficient to enforce |ρ| < 1
def gaussian_2d_loglikelihood(params):
    """Log-likelihood for 2D Gaussian with correlation."""
    mu1, mu2 = params["mu1"], params["mu2"]
    sigma1, sigma2 = params["sigma1"], params["sigma2"]
    rho_transformed = params["rho_t"]

    # Transform correlation coefficient: rho = tanh(rho_t)
    rho = jnp.tanh(rho_transformed)

    # Construct covariance matrix
    cov = jnp.array([
        [sigma1**2, rho * sigma1 * sigma2],
        [rho * sigma1 * sigma2, sigma2**2]
    ])

    # Check positive definiteness
    det = jnp.linalg.det(cov)

    # Return -inf for invalid covariance matrices
    def valid_loglik():
        mu = jnp.array([mu1, mu2])
        return jnp.sum(jax.scipy.stats.multivariate_normal.logpdf(gaussian_data, mu, cov))

    def invalid_loglik():
        return -jnp.inf

    return jax.lax.cond(det > 1e-8, valid_loglik, invalid_loglik)

# | ### 2.5 Set Up Priors for 2D Gaussian
gaussian_prior_bounds = {
    "mu1": (-3.0, 5.0),
    "mu2": (-3.0, 3.0),
    "sigma1": (0.1, 3.0),
    "sigma2": (0.1, 3.0),
    "rho_t": (-2.0, 2.0),  # transformed correlation: rho = tanh(rho_t)
}

num_dims_2d = len(gaussian_prior_bounds)
num_inner_steps_2d = num_dims_2d * 5

# | ### 2.6 Initialize and Run Nested Sampling
rng_key, prior_key = jax.random.split(rng_key)
particles_2d, logprior_fn_2d = blackjax.ns.utils.uniform_prior(prior_key, num_live, gaussian_prior_bounds)

nested_sampler_2d = blackjax.nss(
    logprior_fn=logprior_fn_2d,
    loglikelihood_fn=gaussian_2d_loglikelihood,
    num_delete=num_delete,
    num_inner_steps=num_inner_steps_2d,
)

init_fn_2d = jax.jit(nested_sampler_2d.init)
step_fn_2d = jax.jit(nested_sampler_2d.step)

print("Running nested sampling for 2D Gaussian...")
live_2d = init_fn_2d(particles_2d)
dead_2d = []

with tqdm.tqdm(desc="Dead points", unit=" dead points") as pbar:
    while not live_2d.logZ_live - live_2d.logZ < -3:
        rng_key, subkey = jax.random.split(rng_key, 2)
        live_2d, dead_info_2d = step_fn_2d(subkey, live_2d)
        dead_2d.append(dead_info_2d)
        pbar.update(num_delete)

dead_2d = blackjax.ns.utils.finalise(live_2d, dead_2d)

# | ### 2.7 Transform Back and Analyze Results
columns_2d = ["mu1", "mu2", "sigma1", "sigma2", "rho_t"]
labels_2d = [r"$\mu_1$", r"$\mu_2$", r"$\sigma_1$", r"$\sigma_2$", r"$\rho_t$"]
data_2d = jnp.vstack([dead_2d.particles[key] for key in columns_2d]).T

gaussian_samples = NestedSamples(
    data_2d,
    logL=dead_2d.loglikelihood,
    logL_birth=dead_2d.loglikelihood_birth,
    columns=columns_2d,
    labels=labels_2d,
    logzero=jnp.nan,
)

# | Add transformed correlation coefficient
gaussian_samples["rho"] = jnp.tanh(gaussian_samples["rho_t"].values)

print(f"Log Evidence: {gaussian_samples.logZ():.2f} ± {gaussian_samples.logZ(100).std():.2f}")
print(f"True parameters: μ₁={true['mu1']:.2f}, μ₂={true['mu2']:.2f}, σ₁={true['sigma1']:.2f}, σ₂={true['sigma2']:.2f}, ρ={true['rho']:.2f}")
print(f"Posterior means: μ₁={gaussian_samples.mu1.mean():.2f}, μ₂={gaussian_samples.mu2.mean():.2f}, σ₁={gaussian_samples.sigma1.mean():.2f}, σ₂={gaussian_samples.sigma2.mean():.2f}, ρ={gaussian_samples.rho.mean():.2f}")

# | Plot posterior for key parameters with true values
key_params = ["mu1", "mu2", "sigma1", "sigma2", "rho"]
axes = gaussian_samples[key_params].plot_2d(key_params, kinds={'diagonal': 'hist_1d', 'lower': 'kde_2d'})

# Mark true values using anesthetic's axlines method
true_2d = {k: true[k] for k in key_params}
axes.axlines(true_2d, color='red', linestyle='--', alpha=0.8)
plt.suptitle("2D Gaussian: Posterior Parameter Estimates")

# | ## Part 3: Performance Comparison
# |
# | Compare BlackJAX nested sampling with NUTS (No-U-Turn Sampler) and
# | Affine Invariant Ensemble Sampler on the line fitting problem

import time

# | ### 3.1 Define NUTS Log-Probability Function
def nuts_logprob(params_array):
    """Combined log probability for NUTS (assumes flat priors within bounds)."""
    m, c, log_sigma = params_array
    sigma = jnp.exp(log_sigma)  # positive constraint via log transform

    # Check bounds (flat prior)
    m_valid = (m >= -5.0) & (m <= 5.0)
    c_valid = (c >= -5.0) & (c <= 5.0)
    sigma_valid = (sigma >= 0.1) & (sigma <= 2.0)

    def valid_logprob():
        y_model = m * x + c
        loglik = jax.scipy.stats.multivariate_normal.logpdf(y, y_model, sigma**2)
        return loglik + log_sigma  # Add Jacobian for log transform

    def invalid_logprob():
        return -jnp.inf

    return jax.lax.cond(m_valid & c_valid & sigma_valid, valid_logprob, invalid_logprob)

# | ### 3.2 Initialize and Run NUTS Sampler
initial_position = jnp.array([1.0, 0.0, jnp.log(1.0)])  # [m, c, log_sigma]
nuts = blackjax.nuts(nuts_logprob, step_size=0.1, inverse_mass_matrix=jnp.eye(3))

rng_key, nuts_key = jax.random.split(rng_key)
nuts_state = nuts.init(initial_position)
nuts_step = jax.jit(nuts.step)

print("Running NUTS sampler...")

num_nuts_samples = 2000
nuts_start = time.time()
nuts_samples = []
nuts_states = nuts_state

for i in tqdm.tqdm(range(num_nuts_samples), desc="NUTS"):
    nuts_key, step_key = jax.random.split(nuts_key)
    nuts_states, nuts_info = nuts_step(step_key, nuts_states)
    nuts_samples.append(nuts_states.position)

nuts_time = time.time() - nuts_start
nuts_samples = jnp.stack(nuts_samples)

# | ### 3.3 Process NUTS Results
nuts_m = nuts_samples[:, 0]
nuts_c = nuts_samples[:, 1]
nuts_sigma = jnp.exp(nuts_samples[:, 2])

print(f"NUTS runtime: {nuts_time:.2f} seconds")
print(f"NUTS means: m={nuts_m[500:].mean():.2f}, c={nuts_c[500:].mean():.2f}, σ={nuts_sigma[500:].mean():.2f}")

# | ### 3.4 Performance Summary and Visualization
methods = ["Nested Sampling", "NUTS"]
times = [f"{ns_time:.1f} sec", f"{nuts_time:.1f} sec"]
evidence = ["✓ (Log Z available)", "✗ (Not computed)"]
parallelization = ["✓ (GPU native)", "Limited"]

print(f"{'Method':<20} {'Time':<15} {'Evidence':<15} {'GPU Parallel'}")
print("-" * 65)
for i in range(len(methods)):
    print(f"{methods[i]:<20} {times[i]:<15} {evidence[i]:<15} {parallelization[i]}")

# | ### 3.5 Posterior Comparison Plot
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

# | Generate proper posterior samples from NestedSamples (not raw dead points)
# | Use the number of available samples or 1000, whichever is smaller
n_posterior_samples = min(1000, len(line_samples))
ns_posterior_samples = line_samples.sample(n_posterior_samples, replace=True)  # Sample from posterior with replacement
nuts_burnin = 500  # Remove burn-in

# | Compare marginal posteriors
axes[0].hist(ns_posterior_samples.m.values, bins=30, alpha=0.7, density=True, label='Nested Sampling')
axes[0].hist(nuts_m[nuts_burnin:], bins=30, alpha=0.7, density=True, label='NUTS')
axes[0].axvline(true['m'], color='red', linestyle='--', label='True value')
axes[0].set_xlabel('Slope (m)')
axes[0].set_ylabel('Density')
axes[0].legend()

axes[1].hist(ns_posterior_samples.c.values, bins=30, alpha=0.7, density=True, label='Nested Sampling')
axes[1].hist(nuts_c[nuts_burnin:], bins=30, alpha=0.7, density=True, label='NUTS')
axes[1].axvline(true['c'], color='red', linestyle='--', label='True value')
axes[1].set_xlabel('Intercept (c)')
axes[1].set_ylabel('Density')
axes[1].legend()

axes[2].hist(ns_posterior_samples.sigma.values, bins=30, alpha=0.7, density=True, label='Nested Sampling')
axes[2].hist(nuts_sigma[nuts_burnin:], bins=30, alpha=0.7, density=True, label='NUTS')
axes[2].axvline(true['sigma'], color='red', linestyle='--', label='True value')
axes[2].set_xlabel('Noise (σ)')
axes[2].set_ylabel('Density')
axes[2].legend()

plt.tight_layout()
plt.suptitle("Posterior Comparison: Nested Sampling vs NUTS", y=1.02)

# | ## Part 4: Building Your Own Nested Sampler
# |
# | Advanced users can build custom nested samplers using BlackJAX's low-level components.
# | This demonstrates the modular design and shows how nested sampling works under the hood.

from functools import partial
from blackjax.ns.adaptive import build_kernel, init
from blackjax.ns.base import new_state_and_info, delete_fn
from blackjax.ns.utils import repeat_kernel, finalise
from blackjax.mcmc.random_walk import build_rmh, RWState
from blackjax import SamplingAlgorithm

# | ### 4.1 Define Custom Nested MCMC Algorithm
def custom_nsmcmc(
    logprior_fn,
    loglikelihood_fn,
    num_delete=10,
    num_inner_steps=10,
):
    """
    Build a custom nested sampling MCMC algorithm from low-level components.

    This demonstrates how to construct a nested sampler using BlackJAX's
    modular infrastructure - useful for research and customization.

    Parameters
    ----------
    logprior_fn : callable
        Function that computes the log prior probability of the parameters.
    loglikelihood_fn : callable
        Function that computes the log likelihood of the parameters.
    num_delete : int
        Number of particles to delete at each step.
    num_inner_steps : int
        Number of inner MCMC steps to perform.

    Returns
    -------
    SamplingAlgorithm
        Custom nested sampling algorithm with init and step functions.
    """

    # Build the MCMC kernel for exploring within likelihood constraints
    mcmc_kernel = build_rmh()

    @repeat_kernel(num_inner_steps)
    def inner_kernel(rng_key, state, logprior_fn, loglikelihood_fn, loglikelihood_0, params):
        """Inner MCMC kernel that explores within likelihood constraint."""
        def proposal_distribution(rng_key, position):
            # Handle dictionary position structure
            if isinstance(position, dict):
                step = {}
                for key in position.keys():
                    sigma_val = params['sigma'][key] if isinstance(params['sigma'], dict) else params['sigma']
                    step[key] = sigma_val * jax.random.normal(rng_key, shape=position[key].shape)
                    rng_key, _ = jax.random.split(rng_key)  # Split key for each parameter
                return {key: position[key] + step[key] for key in position.keys()}
            else:
                # Fallback for array position
                step = params['sigma'] * jax.random.normal(rng_key, shape=position.shape)
                return position + step

        # Convert to MCMC state format
        mcmc_state = RWState(position=state.position, logdensity=state.logprior)
        new_mcmc_state, mcmc_info = mcmc_kernel(rng_key, mcmc_state, logprior_fn, proposal_distribution)

        # Evaluate likelihood at new position
        loglikelihood = loglikelihood_fn(new_mcmc_state.position)

        # Create new nested sampling state
        new_state, info = new_state_and_info(
            position=new_mcmc_state.position,
            logprior=new_mcmc_state.logdensity,
            loglikelihood=loglikelihood,
            info=mcmc_info,
        )

        # Accept only if likelihood exceeds threshold (key constraint!)
        new_state = jax.lax.cond(
            loglikelihood > loglikelihood_0,
            lambda _: new_state,
            lambda _: state,
            operand=None,
        )

        return new_state, info

    def update_inner_kernel_params_fn(state, info, params):
        """Adapt step size based on current particle distribution."""
        # Calculate standard deviation for each parameter
        sigma_dict = {}
        for key in state.particles.keys():
            sigma_dict[key] = jnp.std(state.particles[key])
        return {'sigma': sigma_dict}

    # Build the full nested sampling kernel
    _delete_fn = partial(delete_fn, num_delete=num_delete)

    step_fn = build_kernel(
        logprior_fn,
        loglikelihood_fn,
        _delete_fn,
        inner_kernel,
        update_inner_kernel_params_fn,
    )

    init_fn = partial(
        init,
        logprior_fn=logprior_fn,
        loglikelihood_fn=loglikelihood_fn,
        update_inner_kernel_params_fn=update_inner_kernel_params_fn,
    )

    return SamplingAlgorithm(init_fn, step_fn)

# | ### 4.2 Test Custom Sampler on Simple 2D Problem
print("Testing custom nested sampler on 2D Gaussian problem...")

# Simple 2D Gaussian test case
custom_true = {'mu1': 0.5, 'mu2': -0.2, 'sigma1': 0.8, 'sigma2': 0.6}

# Generate test data
custom_mu = jnp.array([custom_true['mu1'], custom_true['mu2']])
custom_cov = jnp.diag(jnp.array([custom_true['sigma1']**2, custom_true['sigma2']**2]))

num_test_samples = 50
key, rng_key = jax.random.split(rng_key)
custom_data = jax.random.multivariate_normal(key, custom_mu, custom_cov, (num_test_samples,))

# Define likelihood and prior for custom sampler
def custom_loglikelihood(params):
    """Simple 2D Gaussian likelihood (no correlation)."""
    mu1, mu2, sigma1, sigma2 = params["mu1"], params["mu2"], params["sigma1"], params["sigma2"]
    mu = jnp.array([mu1, mu2])
    cov = jnp.diag(jnp.array([sigma1**2, sigma2**2]))
    return jnp.sum(jax.scipy.stats.multivariate_normal.logpdf(custom_data, mu, cov))

custom_prior_bounds = {
    "mu1": (-2.0, 3.0),
    "mu2": (-2.0, 2.0),
    "sigma1": (0.1, 2.0),
    "sigma2": (0.1, 2.0),
}

# | ### 4.3 Initialize and Run Custom Nested Sampler
rng_key, prior_key = jax.random.split(rng_key)
particles_custom, logprior_fn_custom = blackjax.ns.utils.uniform_prior(prior_key, num_live, custom_prior_bounds)

# Build custom sampler
custom_nested_sampler = custom_nsmcmc(
    logprior_fn=logprior_fn_custom,
    loglikelihood_fn=custom_loglikelihood,
    num_delete=num_delete,
    num_inner_steps=15,  # Slightly more steps for stability
)

# JIT compile custom sampler functions
custom_init_fn = jax.jit(custom_nested_sampler.init)
custom_step_fn = jax.jit(custom_nested_sampler.step)

print("Running custom nested sampler...")
custom_start = time.time()
custom_live = custom_init_fn(particles_custom)
custom_dead = []

with tqdm.tqdm(desc="Dead points (custom)", unit=" dead points") as pbar:
    while not custom_live.logZ_live - custom_live.logZ < -3:
        rng_key, subkey = jax.random.split(rng_key, 2)
        custom_live, custom_dead_info = custom_step_fn(subkey, custom_live)
        custom_dead.append(custom_dead_info)
        pbar.update(num_delete)

custom_dead = finalise(custom_live, custom_dead)
custom_time = time.time() - custom_start

# | ### 4.4 Process Custom Sampler Results
columns_custom = ["mu1", "mu2", "sigma1", "sigma2"]
labels_custom = [r"$\mu_1$", r"$\mu_2$", r"$\sigma_1$", r"$\sigma_2$"]
data_custom = jnp.vstack([custom_dead.particles[key] for key in columns_custom]).T

custom_samples = NestedSamples(
    data_custom,
    logL=custom_dead.loglikelihood,
    logL_birth=custom_dead.loglikelihood_birth,
    columns=columns_custom,
    labels=labels_custom,
    logzero=jnp.nan,
)

print(f"Custom sampler runtime: {custom_time:.2f} seconds")
print(f"Log Evidence: {custom_samples.logZ():.2f} ± {custom_samples.logZ(100).std():.2f}")
print(f"True parameters: μ₁={custom_true['mu1']:.2f}, μ₂={custom_true['mu2']:.2f}, σ₁={custom_true['sigma1']:.2f}, σ₂={custom_true['sigma2']:.2f}")
print(f"Posterior means: μ₁={custom_samples.mu1.mean():.2f}, μ₂={custom_samples.mu2.mean():.2f}, σ₁={custom_samples.sigma1.mean():.2f}, σ₂={custom_samples.sigma2.mean():.2f}")

# | Compare with high-level BlackJAX implementation
print("\nComparing custom implementation with high-level BlackJAX...")
standard_nested_sampler = blackjax.nss(
    logprior_fn=logprior_fn_custom,
    loglikelihood_fn=custom_loglikelihood,
    num_delete=num_delete,
    num_inner_steps=15,
)

standard_init_fn = jax.jit(standard_nested_sampler.init)
standard_step_fn = jax.jit(standard_nested_sampler.step)

rng_key, prior_key = jax.random.split(rng_key)
particles_standard, _ = blackjax.ns.utils.uniform_prior(prior_key, num_live, custom_prior_bounds)

standard_start = time.time()
standard_live = standard_init_fn(particles_standard)
standard_dead = []

with tqdm.tqdm(desc="Dead points (standard)", unit=" dead points") as pbar:
    while not standard_live.logZ_live - standard_live.logZ < -3:
        rng_key, subkey = jax.random.split(rng_key, 2)
        standard_live, standard_dead_info = standard_step_fn(subkey, standard_live)
        standard_dead.append(standard_dead_info)
        pbar.update(num_delete)

standard_dead = blackjax.ns.utils.finalise(standard_live, standard_dead)
standard_time = time.time() - standard_start

data_standard = jnp.vstack([standard_dead.particles[key] for key in columns_custom]).T
standard_samples = NestedSamples(
    data_standard,
    logL=standard_dead.loglikelihood,
    logL_birth=standard_dead.loglikelihood_birth,
    columns=columns_custom,
    labels=labels_custom,
    logzero=jnp.nan,
)

# | ### 4.5 Visualization and Comparison
print(f"\nImplementation Comparison:")
print(f"Custom sampler:   {custom_time:.2f} sec, logZ = {custom_samples.logZ():.2f}")
print(f"Standard sampler: {standard_time:.2f} sec, logZ = {standard_samples.logZ():.2f}")

# Plot comparison
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
axes = axes.flatten()

for i, param in enumerate(columns_custom):
    axes[i].hist(custom_samples[param].values, bins=25, alpha=0.7, density=True, label='Custom Implementation')
    axes[i].hist(standard_samples[param].values, bins=25, alpha=0.7, density=True, label='Standard BlackJAX')
    axes[i].axvline(custom_true[param], color='red', linestyle='--', label='True value', alpha=0.8)
    axes[i].set_xlabel(labels_custom[i])
    axes[i].set_ylabel('Density')
    axes[i].legend()
    axes[i].grid(True, alpha=0.3)

plt.tight_layout()
plt.suptitle("Custom vs Standard Nested Sampling Implementation", y=1.02)

print("✓ Custom nested sampler implementation successful!")
print("This demonstrates how to build specialized samplers using BlackJAX's modular components.")

# | ## Part 5: JAX Ecosystem Integration
# |
# | Building on Viraj Pandya's JAX tutorial, we demonstrate how nested sampling integrates
# | with the broader JAX ecosystem for automatic differentiation and gradient-based inference.

import optax

# | ### 5.1 JAX-Based 2D Gaussian Inference Problem
# |
# | Following Viraj's tutorial, we'll infer parameters of a 2D Gaussian from image data,
# | then compare gradient descent, HMC, and nested sampling approaches.

print("Setting up JAX ecosystem example with 2D Gaussian image inference...")

# Define true parameters for image-based 2D Gaussian
jax_true = {
    'mu_x': 0.1, 'mu_y': -0.1,
    'sigma_x': 0.15, 'sigma_y': 0.12,
    'rho': 0.3
}

# Create coordinate grid for 2D Gaussian evaluation
grid_size = 32  # Smaller for computational efficiency
x_grid_2d = jnp.linspace(-0.8, 0.8, grid_size)
y_grid_2d = jnp.linspace(-0.8, 0.8, grid_size)
x_meshgrid, y_meshgrid = jnp.meshgrid(x_grid_2d, y_grid_2d)
xy_points_2d = jnp.stack([x_meshgrid.ravel(), y_meshgrid.ravel()], axis=-1)

# | ### 5.2 JAX Simulator Function
@jax.jit
def jax_simulator(params, rng_key):
    """
    JAX-compiled 2D Gaussian image simulator.

    This demonstrates JAX's JIT compilation and automatic differentiation
    capabilities for efficient forward modeling.
    """
    mu_x, mu_y, sigma_x, sigma_y, rho = params

    # Construct mean and covariance matrix
    mu = jnp.array([mu_x, mu_y])
    cov = jnp.array([
        [sigma_x**2, rho * sigma_x * sigma_y],
        [rho * sigma_x * sigma_y, sigma_y**2]
    ])

    # Evaluate 2D Gaussian probability density on grid
    logpdf_grid = jax.scipy.stats.multivariate_normal.logpdf(xy_points_2d, mu, cov)
    pdf_grid = jnp.exp(logpdf_grid).reshape(x_meshgrid.shape)

    # Add noise
    noise = 0.02 * jax.random.normal(rng_key, shape=pdf_grid.shape)
    noisy_image = pdf_grid + noise

    return noisy_image

# Generate synthetic observed image
key, rng_key = jax.random.split(rng_key)
jax_params_true = jnp.array([jax_true['mu_x'], jax_true['mu_y'], jax_true['sigma_x'],
                            jax_true['sigma_y'], jax_true['rho']])
observed_image = jax_simulator(jax_params_true, key)

print(f"Generated {grid_size}x{grid_size} synthetic 2D Gaussian image")

# | ### 5.3 Visualize Observed Data
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
axes[0].imshow(observed_image, extent=[-0.8, 0.8, -0.8, 0.8], origin='lower', cmap='viridis')
axes[0].set_title("Observed 2D Gaussian Image")
axes[0].set_xlabel("x")
axes[0].set_ylabel("y")

# Show true contours
true_image = jax_simulator(jax_params_true, jax.random.PRNGKey(0)) - 0.02 * jax.random.normal(jax.random.PRNGKey(0), shape=observed_image.shape)
axes[1].contour(x_grid_2d, y_grid_2d, true_image, levels=8, colors='white', alpha=0.8)
axes[1].imshow(observed_image, extent=[-0.8, 0.8, -0.8, 0.8], origin='lower', cmap='viridis')
axes[1].set_title("Observed Image with True Contours")
axes[1].set_xlabel("x")
axes[1].set_ylabel("y")

plt.tight_layout()

# | ### 5.4 Define Loss Function for Gradient Descent
@jax.jit
def image_loss(params):
    """Loss function for gradient-based optimization."""
    pred_image = jax_simulator(params, jax.random.PRNGKey(42))  # Fixed key for deterministic prediction
    residuals = pred_image - observed_image
    return jnp.sum(residuals**2) / 2  # L2 loss

# | ### 5.5 Gradient Descent with Optax
print("Running gradient descent optimization...")

# Initialize parameters
init_params = jnp.array([0.0, 0.0, 0.2, 0.2, 0.0])

# Set up optimizer
learning_rate = 0.01
optimizer = optax.adam(learning_rate)
opt_state = optimizer.init(init_params)

# Gradient function
grad_fn = jax.jit(jax.grad(image_loss))

# Optimization loop
params = init_params
losses = []
param_history = []

for i in range(200):
    loss_val = image_loss(params)
    grads = grad_fn(params)

    updates, opt_state = optimizer.update(grads, opt_state)
    params = optax.apply_updates(params, updates)

    losses.append(loss_val)
    param_history.append(params)

    if i % 50 == 0:
        print(f"Step {i}: Loss = {loss_val:.4f}")

final_params_gd = params
param_history = jnp.stack(param_history)

print(f"Gradient descent final parameters: {final_params_gd}")
print(f"True parameters: {jax_params_true}")

# | ### 5.6 Nested Sampling on Image Data
print("Running nested sampling on image data...")

def image_loglikelihood(params_dict):
    """Log-likelihood for nested sampling on image data."""
    params_array = jnp.array([params_dict['mu_x'], params_dict['mu_y'],
                             params_dict['sigma_x'], params_dict['sigma_y'],
                             params_dict['rho']])

    # Forward model (deterministic for likelihood evaluation)
    pred_image = jax_simulator(params_array, jax.random.PRNGKey(42))

    # Gaussian likelihood (independent pixels)
    sigma_obs = 0.02  # Known observation noise
    loglik = jnp.sum(jax.scipy.stats.norm.logpdf(observed_image, pred_image, sigma_obs))

    return loglik

# Define priors for nested sampling
image_prior_bounds = {
    'mu_x': (-0.5, 0.5),
    'mu_y': (-0.5, 0.5),
    'sigma_x': (0.05, 0.3),
    'sigma_y': (0.05, 0.3),
    'rho': (-0.8, 0.8),
}

# Initialize nested sampler
rng_key, prior_key = jax.random.split(rng_key)
particles_image, logprior_fn_image = blackjax.ns.utils.uniform_prior(prior_key, num_live, image_prior_bounds)

nested_sampler_image = blackjax.nss(
    logprior_fn=logprior_fn_image,
    loglikelihood_fn=image_loglikelihood,
    num_delete=num_delete,
    num_inner_steps=25,  # More steps for this complex problem
)

# JIT compile
init_fn_image = jax.jit(nested_sampler_image.init)
step_fn_image = jax.jit(nested_sampler_image.step)

# Run nested sampling
print("Running nested sampling...")
ns_image_start = time.time()
live_image = init_fn_image(particles_image)
dead_image = []

with tqdm.tqdm(desc="Dead points (image)", unit=" dead points") as pbar:
    while not live_image.logZ_live - live_image.logZ < -3:
        rng_key, subkey = jax.random.split(rng_key, 2)
        live_image, dead_info_image = step_fn_image(subkey, live_image)
        dead_image.append(dead_info_image)
        pbar.update(num_delete)

dead_image = blackjax.ns.utils.finalise(live_image, dead_image)
ns_image_time = time.time() - ns_image_start

# | ### 5.7 Process Image Inference Results
columns_image = ['mu_x', 'mu_y', 'sigma_x', 'sigma_y', 'rho']
labels_image = [r'$\mu_x$', r'$\mu_y$', r'$\sigma_x$', r'$\sigma_y$', r'$\rho$']
data_image = jnp.vstack([dead_image.particles[key] for key in columns_image]).T

image_samples = NestedSamples(
    data_image,
    logL=dead_image.loglikelihood,
    logL_birth=dead_image.loglikelihood_birth,
    columns=columns_image,
    labels=labels_image,
    logzero=jnp.nan,
)

print(f"Image inference results:")
print(f"Nested sampling runtime: {ns_image_time:.2f} seconds")
print(f"Log Evidence: {image_samples.logZ():.2f} ± {image_samples.logZ(100).std():.2f}")
print(f"True parameters: μₓ={jax_true['mu_x']:.3f}, μᵧ={jax_true['mu_y']:.3f}, σₓ={jax_true['sigma_x']:.3f}, σᵧ={jax_true['sigma_y']:.3f}, ρ={jax_true['rho']:.3f}")
print(f"NS posterior means: μₓ={image_samples.mu_x.mean():.3f}, μᵧ={image_samples.mu_y.mean():.3f}, σₓ={image_samples.sigma_x.mean():.3f}, σᵧ={image_samples.sigma_y.mean():.3f}, ρ={image_samples.rho.mean():.3f}")
print(f"GD final estimates: μₓ={final_params_gd[0]:.3f}, μᵧ={final_params_gd[1]:.3f}, σₓ={final_params_gd[2]:.3f}, σᵧ={final_params_gd[3]:.3f}, ρ={final_params_gd[4]:.3f}")

# | ### 5.8 Comparison Visualization
fig, axes = plt.subplots(2, 3, figsize=(15, 8))

# Parameter evolution during gradient descent
param_labels = ['μₓ', 'μᵧ', 'σₓ', 'σᵧ', 'ρ']
jax_true_vals = [jax_true['mu_x'], jax_true['mu_y'], jax_true['sigma_x'], jax_true['sigma_y'], jax_true['rho']]

for i in range(5):
    row, col = i // 3, i % 3
    axes[row, col].plot(param_history[:, i], label='Gradient Descent', alpha=0.8)
    axes[row, col].axhline(jax_true_vals[i], color='red', linestyle='--', alpha=0.8, label='True Value')
    axes[row, col].set_ylabel(param_labels[i])
    axes[row, col].set_xlabel('Optimization Step')
    axes[row, col].legend()
    axes[row, col].grid(True, alpha=0.3)

# Loss evolution
axes[1, 2].plot(losses, color='blue', alpha=0.8)
axes[1, 2].set_ylabel('Loss')
axes[1, 2].set_xlabel('Optimization Step')
axes[1, 2].set_title('Gradient Descent Convergence')
axes[1, 2].grid(True, alpha=0.3)

plt.tight_layout()
plt.suptitle("JAX Ecosystem: Gradient Descent vs Nested Sampling", y=1.02)

print("✓ JAX ecosystem integration complete!")
print("This demonstrates the complementary strengths of gradient-based and nested sampling approaches.")

# | ## Part 6: Simulation-Based Inference (SBI) Integration
# |
# | We demonstrate how nested sampling integrates with modern SBI techniques,
# | specifically neural posterior estimation (NPE) using JAX and simple neural networks.

from flax import linen as nn

print("Setting up SBI integration example with neural posterior estimation...")

# | ### 6.1 SBI Training Data Generation
# |
# | Generate a large dataset of (parameters, simulations) pairs for training
# | a neural network to approximate the posterior distribution.

print("Generating SBI training dataset...")

def generate_sbi_dataset(n_samples, rng_key):
    """Generate training data for neural posterior estimation."""
    # Sample parameters from prior
    key_prior, key_sim = jax.random.split(rng_key)

    # SBI prior bounds (same as image inference)
    sbi_bounds = jnp.array([
        [-0.5, 0.5],  # mu_x
        [-0.5, 0.5],  # mu_y
        [0.05, 0.3],  # sigma_x
        [0.05, 0.3],  # sigma_y
        [-0.8, 0.8],  # rho
    ])

    # Sample uniformly from prior
    params_sbi = jax.random.uniform(key_prior, (n_samples, 5))
    params_sbi = params_sbi * (sbi_bounds[:, 1] - sbi_bounds[:, 0]) + sbi_bounds[:, 0]

    # Generate simulations for each parameter set
    sim_keys = jax.random.split(key_sim, n_samples)

    @jax.jit
    def simulate_batch(params_batch, keys_batch):
        """Vectorized simulation function."""
        return jax.vmap(jax_simulator)(params_batch, keys_batch)

    # Generate simulations
    simulations = simulate_batch(params_sbi, sim_keys)

    return params_sbi, simulations

# Generate training dataset
n_sbi_samples = 5000  # Moderate size for workshop
key, rng_key = jax.random.split(rng_key)
sbi_params, sbi_sims = generate_sbi_dataset(n_sbi_samples, key)

print(f"Generated SBI dataset: {n_sbi_samples} parameter-simulation pairs")
print(f"Parameter shape: {sbi_params.shape}, Simulation shape: {sbi_sims.shape}")

# | ### 6.2 Neural Posterior Network
# |
# | Define a simple neural network that maps from simulations to posterior parameters.
# | This is a basic implementation of neural posterior estimation (NPE).

class NPENetwork(nn.Module):
    """Neural Posterior Estimation network."""
    hidden_dims: tuple = (64, 64)

    @nn.compact
    def __call__(self, x):
        # Flatten simulation data
        x = x.reshape((x.shape[0], -1))

        # Hidden layers
        for dim in self.hidden_dims:
            x = nn.Dense(dim)(x)
            x = nn.relu(x)

        # Output layer (5 parameters)
        x = nn.Dense(5)(x)

        return x

# | ### 6.3 Training the Neural Posterior
print("Training neural posterior network...")

def train_npe_network(network, params_data, sims_data, n_epochs=300, learning_rate=1e-3):
    """Train the neural posterior estimation network."""
    # Initialize network
    rng = jax.random.PRNGKey(42)
    network_params = network.init(rng, sims_data[:1])

    # Setup optimizer
    optimizer = optax.adam(learning_rate)
    opt_state = optimizer.init(network_params)

    # Loss function
    def mse_loss(network_params, sims_batch, params_batch):
        pred_params = network.apply(network_params, sims_batch)
        return jnp.mean((pred_params - params_batch)**2)

    # Training step
    @jax.jit
    def train_step(network_params, opt_state, sims_batch, params_batch):
        loss, grads = jax.value_and_grad(mse_loss)(network_params, sims_batch, params_batch)
        updates, opt_state = optimizer.update(grads, opt_state)
        network_params = optax.apply_updates(network_params, updates)
        return network_params, opt_state, loss

    # Training loop
    losses = []
    for epoch in range(n_epochs):
        # Simple full-batch training (could use mini-batches for larger datasets)
        network_params, opt_state, loss = train_step(network_params, opt_state, sims_data, params_data)
        losses.append(loss)

        if epoch % 50 == 0:
            print(f"Epoch {epoch}: Loss = {loss:.6f}")

    return network_params, losses

# Train the network
npe_network = NPENetwork()
npe_start = time.time()
trained_npe_params, npe_losses = train_npe_network(npe_network, sbi_params, sbi_sims)
npe_time = time.time() - npe_start

print(f"NPE training completed in {npe_time:.2f} seconds")

# | ### 6.4 SBI Posterior Inference
print("Performing SBI posterior inference on observed data...")

# Use the same observed image from Part 5
npe_posterior_params = npe_network.apply(trained_npe_params, observed_image[None, ...])
npe_prediction = npe_posterior_params[0]  # Remove batch dimension