Skip to content

Commit 06d1cb5

Browse files
committed
chore: code snippets
1 parent 4399371 commit 06d1cb5

File tree

1 file changed

+376
-0
lines changed

1 file changed

+376
-0
lines changed

_posts/2024-11-15-a critical examination of bayesian posteriors as test statistics.md

Lines changed: 376 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ where $$k$$ is a constant of proportionality that may be ignored when comparing
5757
The likelihood function is not a probability distribution over $$\theta$$; rather, it serves as a tool for estimation and hypothesis testing. It allows us to identify parameter values that make the observed data most plausible.
5858

5959
### Bayesian Posterior Distributions
60+
6061
In Bayesian statistics, the posterior distribution represents the updated belief about the parameter $$\theta$$ after observing data $$x$$. It is derived using Bayes' theorem:
6162

6263
$$
@@ -249,3 +250,378 @@ By maintaining clarity, precision, and a thorough understanding of the tools at
249250
- **Be Mindful of Prior Information:** When using Bayesian methods, carefully select priors and assess their influence on the posterior distribution.
250251
- **Consider the Practical Implications:** Choose statistical methods that are tractable and provide clear, actionable insights.
251252
- **Stay Informed of Methodological Debates:** Engage with the ongoing discourse between Bayesian and frequentist methodologies to enhance understanding and application.
253+
254+
## Appendix
255+
256+
### Python Code for Bayesian Posterior and Test Statistics
257+
258+
```python
259+
# Import necessary libraries
260+
import numpy as np
261+
from scipy import stats
262+
import matplotlib.pyplot as plt
263+
264+
# Define a prior distribution (uniform prior)
265+
def prior(theta):
266+
return 1 if 0 <= theta <= 1 else 0
267+
268+
# Define the likelihood function
269+
def likelihood(theta, data):
270+
return np.prod(stats.binom.pmf(data, n=1, p=theta))
271+
272+
# Define the posterior using Bayes' theorem
273+
def posterior(theta, data):
274+
return likelihood(theta, data) * prior(theta)
275+
276+
# Normalize the posterior to ensure it integrates to 1
277+
def normalized_posterior(data):
278+
theta_range = np.linspace(0, 1, 100)
279+
posterior_values = np.array([posterior(theta, data) for theta in theta_range])
280+
normalization_constant = np.trapz(posterior_values, theta_range)
281+
return theta_range, posterior_values / normalization_constant
282+
283+
# Plot the posterior distribution
284+
def plot_posterior(data):
285+
theta_range, norm_posterior = normalized_posterior(data)
286+
plt.plot(theta_range, norm_posterior, label='Posterior')
287+
plt.title('Posterior Distribution')
288+
plt.xlabel('Theta')
289+
plt.ylabel('Density')
290+
plt.legend()
291+
plt.show()
292+
293+
# Simulate data (e.g., Bernoulli trials with true parameter 0.7)
294+
data = np.random.binomial(1, 0.7, size=20)
295+
296+
# Plot the posterior for the given data
297+
plot_posterior(data)
298+
299+
# Compute the test statistics (mean, variance, etc.)
300+
mean_posterior = np.trapz(theta_range * norm_posterior, theta_range)
301+
variance_posterior = np.trapz((theta_range - mean_posterior) ** 2 * norm_posterior, theta_range)
302+
credible_interval = np.percentile(theta_range, [2.5, 97.5])
303+
304+
# Print posterior mean, variance, and credible interval
305+
print(f"Posterior Mean: {mean_posterior}")
306+
print(f"Posterior Variance: {variance_posterior}")
307+
print(f"95% Credible Interval: {credible_interval}")
308+
309+
# Frequentist Test Statistics Example: Likelihood Ratio Test
310+
def likelihood_ratio_test(data, theta_null, theta_alt):
311+
ll_null = np.sum(np.log(stats.binom.pmf(data, n=1, p=theta_null)))
312+
ll_alt = np.sum(np.log(stats.binom.pmf(data, n=1, p=theta_alt)))
313+
return 2 * (ll_alt - ll_null)
314+
315+
# Perform a likelihood ratio test for two values of theta
316+
lr_stat = likelihood_ratio_test(data, theta_null=0.5, theta_alt=0.7)
317+
p_value = stats.chi2.sf(lr_stat, df=1)
318+
print(f"Likelihood Ratio Test Statistic: {lr_stat}")
319+
print(f"p-value: {p_value}")
320+
```
321+
322+
### R Code for Bayesian Posterior and Test Statistics
323+
324+
```r
325+
# Load necessary libraries
326+
library(ggplot2)
327+
328+
# Define a uniform prior
329+
prior <- function(theta) {
330+
ifelse(theta >= 0 & theta <= 1, 1, 0)
331+
}
332+
333+
# Define the likelihood function (Bernoulli trials)
334+
likelihood <- function(theta, data) {
335+
prod(dbinom(data, size = 1, prob = theta))
336+
}
337+
338+
# Define the posterior function using Bayes' theorem
339+
posterior <- function(theta, data) {
340+
likelihood(theta, data) * prior(theta)
341+
}
342+
343+
# Normalize the posterior distribution
344+
normalized_posterior <- function(data) {
345+
theta_range <- seq(0, 1, length.out = 100)
346+
posterior_values <- sapply(theta_range, posterior, data = data)
347+
normalization_constant <- sum(posterior_values) * diff(range(theta_range)) / length(theta_range)
348+
list(theta_range = theta_range, posterior_values = posterior_values / normalization_constant)
349+
}
350+
351+
# Plot the posterior distribution
352+
plot_posterior <- function(data) {
353+
result <- normalized_posterior(data)
354+
df <- data.frame(theta = result$theta_range, posterior = result$posterior_values)
355+
356+
ggplot(df, aes(x = theta, y = posterior)) +
357+
geom_line() +
358+
labs(title = "Posterior Distribution", x = "Theta", y = "Density") +
359+
theme_minimal()
360+
}
361+
362+
# Simulate data (e.g., Bernoulli trials with true parameter 0.7)
363+
set.seed(123)
364+
data <- rbinom(20, size = 1, prob = 0.7)
365+
366+
# Plot the posterior for the given data
367+
plot_posterior(data)
368+
369+
# Compute posterior mean, variance, and credible interval
370+
posterior_summary <- function(data) {
371+
result <- normalized_posterior(data)
372+
theta_range <- result$theta_range
373+
posterior_values <- result$posterior_values
374+
375+
mean_posterior <- sum(theta_range * posterior_values) * diff(range(theta_range)) / length(theta_range)
376+
variance_posterior <- sum((theta_range - mean_posterior)^2 * posterior_values) * diff(range(theta_range)) / length(theta_range)
377+
credible_interval <- quantile(theta_range, c(0.025, 0.975))
378+
379+
list(mean = mean_posterior, variance = variance_posterior, credible_interval = credible_interval)
380+
}
381+
382+
# Compute and print posterior summary statistics
383+
summary_stats <- posterior_summary(data)
384+
print(paste("Posterior Mean:", summary_stats$mean))
385+
print(paste("Posterior Variance:", summary_stats$variance))
386+
print(paste("95% Credible Interval:", paste(summary_stats$credible_interval, collapse = " - ")))
387+
388+
# Frequentist Test Statistics Example: Likelihood Ratio Test
389+
likelihood_ratio_test <- function(data, theta_null, theta_alt) {
390+
ll_null <- sum(dbinom(data, size = 1, prob = theta_null, log = TRUE))
391+
ll_alt <- sum(dbinom(data, size = 1, prob = theta_alt, log = TRUE))
392+
test_stat <- 2 * (ll_alt - ll_null)
393+
p_value <- 1 - pchisq(test_stat, df = 1)
394+
list(test_stat = test_stat, p_value = p_value)
395+
}
396+
397+
# Perform a likelihood ratio test for two values of theta
398+
lr_test_result <- likelihood_ratio_test(data, theta_null = 0.5, theta_alt = 0.7)
399+
print(paste("Likelihood Ratio Test Statistic:", lr_test_result$test_stat))
400+
print(paste("p-value:", lr_test_result$p_value))
401+
```
402+
403+
### Scala Code for Bayesian Posterior and Test Statistics
404+
405+
```scala
406+
// Import necessary libraries
407+
import breeze.stats.distributions._
408+
import breeze.linalg._
409+
import breeze.plot._
410+
import scala.math._
411+
412+
// Define a uniform prior
413+
def prior(theta: Double): Double = {
414+
if (theta >= 0 && theta <= 1) 1.0 else 0.0
415+
}
416+
417+
// Define the likelihood function (Bernoulli trials)
418+
def likelihood(theta: Double, data: Seq[Int]): Double = {
419+
data.map(x => pow(theta, x) * pow(1 - theta, 1 - x)).product
420+
}
421+
422+
// Define the posterior function using Bayes' theorem
423+
def posterior(theta: Double, data: Seq[Int]): Double = {
424+
likelihood(theta, data) * prior(theta)
425+
}
426+
427+
// Normalize the posterior distribution
428+
def normalizedPosterior(data: Seq[Int]): (DenseVector[Double], DenseVector[Double]) = {
429+
val thetaRange = linspace(0.0, 1.0, 100)
430+
val posteriorValues = DenseVector(thetaRange.map(posterior(_, data)).toArray)
431+
val normalizationConstant = sum(posteriorValues) * (thetaRange(1) - thetaRange(0))
432+
(thetaRange, posteriorValues / normalizationConstant)
433+
}
434+
435+
// Plot the posterior distribution
436+
def plotPosterior(data: Seq[Int]): Unit = {
437+
val (thetaRange, normPosterior) = normalizedPosterior(data)
438+
val f = Figure()
439+
val p = f.subplot(0)
440+
p += plot(thetaRange, normPosterior)
441+
p.title = "Posterior Distribution"
442+
p.xlabel = "Theta"
443+
p.ylabel = "Density"
444+
f.saveas("posterior_plot.png")
445+
}
446+
447+
// Simulate data (e.g., Bernoulli trials with true parameter 0.7)
448+
val data = Seq.fill(20)(if (Gaussian(0.7, 0.15).draw() > 0.5) 1 else 0)
449+
450+
// Plot the posterior for the given data
451+
plotPosterior(data)
452+
453+
// Compute posterior mean, variance, and credible interval
454+
def posteriorSummary(data: Seq[Int]): (Double, Double, (Double, Double)) = {
455+
val (thetaRange, normPosterior) = normalizedPosterior(data)
456+
val meanPosterior = sum(thetaRange *:* normPosterior) * (thetaRange(1) - thetaRange(0))
457+
val variancePosterior = sum(pow(thetaRange - meanPosterior, 2) *:* normPosterior) * (thetaRange(1) - thetaRange(0))
458+
val credibleInterval = (thetaRange(2), thetaRange(97))
459+
(meanPosterior, variancePosterior, credibleInterval)
460+
}
461+
462+
// Compute and print posterior summary statistics
463+
val (mean, variance, credibleInterval) = posteriorSummary(data)
464+
println(s"Posterior Mean: $mean")
465+
println(s"Posterior Variance: $variance")
466+
println(s"95% Credible Interval: ${credibleInterval._1} - ${credibleInterval._2}")
467+
468+
// Frequentist Test Statistics Example: Likelihood Ratio Test
469+
def likelihoodRatioTest(data: Seq[Int], thetaNull: Double, thetaAlt: Double): (Double, Double) = {
470+
val logLikelihoodNull = data.map(x => x * log(thetaNull) + (1 - x) * log(1 - thetaNull)).sum
471+
val logLikelihoodAlt = data.map(x => x * log(thetaAlt) + (1 - x) * log(1 - thetaAlt)).sum
472+
val testStat = 2 * (logLikelihoodAlt - logLikelihoodNull)
473+
val pValue = 1 - breeze.stats.distributions.ChiSquared(1).cdf(testStat)
474+
(testStat, pValue)
475+
}
476+
477+
// Perform a likelihood ratio test for two values of theta
478+
val (lrStat, pValue) = likelihoodRatioTest(data, thetaNull = 0.5, thetaAlt = 0.7)
479+
println(s"Likelihood Ratio Test Statistic: $lrStat")
480+
println(s"p-value: $pValue")
481+
```
482+
483+
### Go Code for Bayesian Posterior and Test Statistics
484+
485+
```go
486+
package main
487+
488+
import (
489+
"fmt"
490+
"math"
491+
"math/rand"
492+
"sort"
493+
494+
"gonum.org/v1/gonum/stat/distuv"
495+
"gonum.org/v1/plot"
496+
"gonum.org/v1/plot/plotter"
497+
"gonum.org/v1/plot/vg"
498+
)
499+
500+
// Define the prior function (uniform prior)
501+
func prior(theta float64) float64 {
502+
if theta >= 0 && theta <= 1 {
503+
return 1
504+
}
505+
return 0
506+
}
507+
508+
// Define the likelihood function (Bernoulli trials)
509+
func likelihood(theta float64, data []int) float64 {
510+
likelihood := 1.0
511+
for _, x := range data {
512+
likelihood *= math.Pow(theta, float64(x)) * math.Pow(1-theta, float64(1-x))
513+
}
514+
return likelihood
515+
}
516+
517+
// Define the posterior function using Bayes' theorem
518+
func posterior(theta float64, data []int) float64 {
519+
return likelihood(theta, data) * prior(theta)
520+
}
521+
522+
// Normalize the posterior distribution
523+
func normalizedPosterior(data []int) ([]float64, []float64) {
524+
thetas := make([]float64, 100)
525+
posteriors := make([]float64, 100)
526+
sumPosterior := 0.0
527+
528+
for i := 0; i < 100; i++ {
529+
theta := float64(i) / 100
530+
thetas[i] = theta
531+
post := posterior(theta, data)
532+
posteriors[i] = post
533+
sumPosterior += post
534+
}
535+
536+
for i := range posteriors {
537+
posteriors[i] /= sumPosterior
538+
}
539+
540+
return thetas, posteriors
541+
}
542+
543+
// Plot the posterior distribution
544+
func plotPosterior(data []int) {
545+
thetas, posteriors := normalizedPosterior(data)
546+
547+
p, _ := plot.New()
548+
p.Title.Text = "Posterior Distribution"
549+
p.X.Label.Text = "Theta"
550+
p.Y.Label.Text = "Density"
551+
552+
pts := make(plotter.XYs, len(thetas))
553+
for i := range thetas {
554+
pts[i].X = thetas[i]
555+
pts[i].Y = posteriors[i]
556+
}
557+
558+
line, _ := plotter.NewLine(pts)
559+
p.Add(line)
560+
p.Save(4*vg.Inch, 4*vg.Inch, "posterior.png")
561+
}
562+
563+
// Simulate data (e.g., Bernoulli trials with true parameter 0.7)
564+
func simulateData(size int, prob float64) []int {
565+
data := make([]int, size)
566+
for i := range data {
567+
if rand.Float64() < prob {
568+
data[i] = 1
569+
} else {
570+
data[i] = 0
571+
}
572+
}
573+
return data
574+
}
575+
576+
// Compute posterior mean, variance, and credible interval
577+
func posteriorSummary(data []int) (float64, float64, [2]float64) {
578+
thetas, posteriors := normalizedPosterior(data)
579+
580+
meanPosterior := 0.0
581+
for i := range thetas {
582+
meanPosterior += thetas[i] * posteriors[i]
583+
}
584+
585+
variancePosterior := 0.0
586+
for i := range thetas {
587+
variancePosterior += math.Pow(thetas[i]-meanPosterior, 2) * posteriors[i]
588+
}
589+
590+
credibleInterval := [2]float64{thetas[2], thetas[97]}
591+
return meanPosterior, variancePosterior, credibleInterval
592+
}
593+
594+
// Likelihood ratio test
595+
func likelihoodRatioTest(data []int, thetaNull, thetaAlt float64) (float64, float64) {
596+
llNull := 0.0
597+
llAlt := 0.0
598+
599+
for _, x := range data {
600+
llNull += float64(x)*math.Log(thetaNull) + float64(1-x)*math.Log(1-thetaNull)
601+
llAlt += float64(x)*math.Log(thetaAlt) + float64(1-x)*math.Log(1-thetaAlt)
602+
}
603+
604+
testStat := 2 * (llAlt - llNull)
605+
pValue := 1 - distuv.ChiSquared{K: 1}.CDF(testStat)
606+
return testStat, pValue
607+
}
608+
609+
func main() {
610+
// Simulate data
611+
data := simulateData(20, 0.7)
612+
613+
// Plot posterior distribution
614+
plotPosterior(data)
615+
616+
// Compute and print posterior summary statistics
617+
mean, variance, credibleInterval := posteriorSummary(data)
618+
fmt.Printf("Posterior Mean: %.4f\n", mean)
619+
fmt.Printf("Posterior Variance: %.4f\n", variance)
620+
fmt.Printf("95%% Credible Interval: [%.4f, %.4f]\n", credibleInterval[0], credibleInterval[1])
621+
622+
// Perform likelihood ratio test
623+
lrStat, pValue := likelihoodRatioTest(data, 0.5, 0.7)
624+
fmt.Printf("Likelihood Ratio Test Statistic: %.4f\n", lrStat)
625+
fmt.Printf("p-value: %.4f\n", pValue)
626+
}
627+
```

0 commit comments

Comments
 (0)