@@ -57,6 +57,7 @@ where $$k$$ is a constant of proportionality that may be ignored when comparing
5757The likelihood function is not a probability distribution over $$ \theta $$ ; rather, it serves as a tool for estimation and hypothesis testing. It allows us to identify parameter values that make the observed data most plausible.
5858
5959### Bayesian Posterior Distributions
60+
6061In Bayesian statistics, the posterior distribution represents the updated belief about the parameter $$ \theta $$ after observing data $$ x $$ . It is derived using Bayes' theorem:
6162
6263$$
@@ -249,3 +250,378 @@ By maintaining clarity, precision, and a thorough understanding of the tools at
249250- ** Be Mindful of Prior Information:** When using Bayesian methods, carefully select priors and assess their influence on the posterior distribution.
250251- ** Consider the Practical Implications:** Choose statistical methods that are tractable and provide clear, actionable insights.
251252- ** Stay Informed of Methodological Debates:** Engage with the ongoing discourse between Bayesian and frequentist methodologies to enhance understanding and application.
253+
254+ ## Appendix
255+
256+ ### Python Code for Bayesian Posterior and Test Statistics
257+
258+ ``` python
259+ # Import necessary libraries
260+ import numpy as np
261+ from scipy import stats
262+ import matplotlib.pyplot as plt
263+
264+ # Define a prior distribution (uniform prior)
265+ def prior (theta ):
266+ return 1 if 0 <= theta <= 1 else 0
267+
268+ # Define the likelihood function
269+ def likelihood (theta , data ):
270+ return np.prod(stats.binom.pmf(data, n = 1 , p = theta))
271+
272+ # Define the posterior using Bayes' theorem
273+ def posterior (theta , data ):
274+ return likelihood(theta, data) * prior(theta)
275+
276+ # Normalize the posterior to ensure it integrates to 1
277+ def normalized_posterior (data ):
278+ theta_range = np.linspace(0 , 1 , 100 )
279+ posterior_values = np.array([posterior(theta, data) for theta in theta_range])
280+ normalization_constant = np.trapz(posterior_values, theta_range)
281+ return theta_range, posterior_values / normalization_constant
282+
283+ # Plot the posterior distribution
284+ def plot_posterior (data ):
285+ theta_range, norm_posterior = normalized_posterior(data)
286+ plt.plot(theta_range, norm_posterior, label = ' Posterior' )
287+ plt.title(' Posterior Distribution' )
288+ plt.xlabel(' Theta' )
289+ plt.ylabel(' Density' )
290+ plt.legend()
291+ plt.show()
292+
293+ # Simulate data (e.g., Bernoulli trials with true parameter 0.7)
294+ data = np.random.binomial(1 , 0.7 , size = 20 )
295+
296+ # Plot the posterior for the given data
297+ plot_posterior(data)
298+
299+ # Compute the test statistics (mean, variance, etc.)
300+ mean_posterior = np.trapz(theta_range * norm_posterior, theta_range)
301+ variance_posterior = np.trapz((theta_range - mean_posterior) ** 2 * norm_posterior, theta_range)
302+ credible_interval = np.percentile(theta_range, [2.5 , 97.5 ])
303+
304+ # Print posterior mean, variance, and credible interval
305+ print (f " Posterior Mean: { mean_posterior} " )
306+ print (f " Posterior Variance: { variance_posterior} " )
307+ print (f " 95% Credible Interval: { credible_interval} " )
308+
309+ # Frequentist Test Statistics Example: Likelihood Ratio Test
310+ def likelihood_ratio_test (data , theta_null , theta_alt ):
311+ ll_null = np.sum(np.log(stats.binom.pmf(data, n = 1 , p = theta_null)))
312+ ll_alt = np.sum(np.log(stats.binom.pmf(data, n = 1 , p = theta_alt)))
313+ return 2 * (ll_alt - ll_null)
314+
315+ # Perform a likelihood ratio test for two values of theta
316+ lr_stat = likelihood_ratio_test(data, theta_null = 0.5 , theta_alt = 0.7 )
317+ p_value = stats.chi2.sf(lr_stat, df = 1 )
318+ print (f " Likelihood Ratio Test Statistic: { lr_stat} " )
319+ print (f " p-value: { p_value} " )
320+ ```
321+
322+ ### R Code for Bayesian Posterior and Test Statistics
323+
324+ ``` r
325+ # Load necessary libraries
326+ library(ggplot2 )
327+
328+ # Define a uniform prior
329+ prior <- function (theta ) {
330+ ifelse(theta > = 0 & theta < = 1 , 1 , 0 )
331+ }
332+
333+ # Define the likelihood function (Bernoulli trials)
334+ likelihood <- function (theta , data ) {
335+ prod(dbinom(data , size = 1 , prob = theta ))
336+ }
337+
338+ # Define the posterior function using Bayes' theorem
339+ posterior <- function (theta , data ) {
340+ likelihood(theta , data ) * prior(theta )
341+ }
342+
343+ # Normalize the posterior distribution
344+ normalized_posterior <- function (data ) {
345+ theta_range <- seq(0 , 1 , length.out = 100 )
346+ posterior_values <- sapply(theta_range , posterior , data = data )
347+ normalization_constant <- sum(posterior_values ) * diff(range(theta_range )) / length(theta_range )
348+ list (theta_range = theta_range , posterior_values = posterior_values / normalization_constant )
349+ }
350+
351+ # Plot the posterior distribution
352+ plot_posterior <- function (data ) {
353+ result <- normalized_posterior(data )
354+ df <- data.frame (theta = result $ theta_range , posterior = result $ posterior_values )
355+
356+ ggplot(df , aes(x = theta , y = posterior )) +
357+ geom_line() +
358+ labs(title = " Posterior Distribution" , x = " Theta" , y = " Density" ) +
359+ theme_minimal()
360+ }
361+
362+ # Simulate data (e.g., Bernoulli trials with true parameter 0.7)
363+ set.seed(123 )
364+ data <- rbinom(20 , size = 1 , prob = 0.7 )
365+
366+ # Plot the posterior for the given data
367+ plot_posterior(data )
368+
369+ # Compute posterior mean, variance, and credible interval
370+ posterior_summary <- function (data ) {
371+ result <- normalized_posterior(data )
372+ theta_range <- result $ theta_range
373+ posterior_values <- result $ posterior_values
374+
375+ mean_posterior <- sum(theta_range * posterior_values ) * diff(range(theta_range )) / length(theta_range )
376+ variance_posterior <- sum((theta_range - mean_posterior )^ 2 * posterior_values ) * diff(range(theta_range )) / length(theta_range )
377+ credible_interval <- quantile(theta_range , c(0.025 , 0.975 ))
378+
379+ list (mean = mean_posterior , variance = variance_posterior , credible_interval = credible_interval )
380+ }
381+
382+ # Compute and print posterior summary statistics
383+ summary_stats <- posterior_summary(data )
384+ print(paste(" Posterior Mean:" , summary_stats $ mean ))
385+ print(paste(" Posterior Variance:" , summary_stats $ variance ))
386+ print(paste(" 95% Credible Interval:" , paste(summary_stats $ credible_interval , collapse = " - " )))
387+
388+ # Frequentist Test Statistics Example: Likelihood Ratio Test
389+ likelihood_ratio_test <- function (data , theta_null , theta_alt ) {
390+ ll_null <- sum(dbinom(data , size = 1 , prob = theta_null , log = TRUE ))
391+ ll_alt <- sum(dbinom(data , size = 1 , prob = theta_alt , log = TRUE ))
392+ test_stat <- 2 * (ll_alt - ll_null )
393+ p_value <- 1 - pchisq(test_stat , df = 1 )
394+ list (test_stat = test_stat , p_value = p_value )
395+ }
396+
397+ # Perform a likelihood ratio test for two values of theta
398+ lr_test_result <- likelihood_ratio_test(data , theta_null = 0.5 , theta_alt = 0.7 )
399+ print(paste(" Likelihood Ratio Test Statistic:" , lr_test_result $ test_stat ))
400+ print(paste(" p-value:" , lr_test_result $ p_value ))
401+ ```
402+
403+ ### Scala Code for Bayesian Posterior and Test Statistics
404+
405+ ``` scala
406+ // Import necessary libraries
407+ import breeze .stats .distributions ._
408+ import breeze .linalg ._
409+ import breeze .plot ._
410+ import scala .math ._
411+
412+ // Define a uniform prior
413+ def prior (theta : Double ): Double = {
414+ if (theta >= 0 && theta <= 1 ) 1.0 else 0.0
415+ }
416+
417+ // Define the likelihood function (Bernoulli trials)
418+ def likelihood (theta : Double , data : Seq [Int ]): Double = {
419+ data.map(x => pow(theta, x) * pow(1 - theta, 1 - x)).product
420+ }
421+
422+ // Define the posterior function using Bayes' theorem
423+ def posterior (theta : Double , data : Seq [Int ]): Double = {
424+ likelihood(theta, data) * prior(theta)
425+ }
426+
427+ // Normalize the posterior distribution
428+ def normalizedPosterior (data : Seq [Int ]): (DenseVector [Double ], DenseVector [Double ]) = {
429+ val thetaRange = linspace(0.0 , 1.0 , 100 )
430+ val posteriorValues = DenseVector (thetaRange.map(posterior(_, data)).toArray)
431+ val normalizationConstant = sum(posteriorValues) * (thetaRange(1 ) - thetaRange(0 ))
432+ (thetaRange, posteriorValues / normalizationConstant)
433+ }
434+
435+ // Plot the posterior distribution
436+ def plotPosterior (data : Seq [Int ]): Unit = {
437+ val (thetaRange, normPosterior) = normalizedPosterior(data)
438+ val f = Figure ()
439+ val p = f.subplot(0 )
440+ p += plot(thetaRange, normPosterior)
441+ p.title = " Posterior Distribution"
442+ p.xlabel = " Theta"
443+ p.ylabel = " Density"
444+ f.saveas(" posterior_plot.png" )
445+ }
446+
447+ // Simulate data (e.g., Bernoulli trials with true parameter 0.7)
448+ val data = Seq .fill(20 )(if (Gaussian (0.7 , 0.15 ).draw() > 0.5 ) 1 else 0 )
449+
450+ // Plot the posterior for the given data
451+ plotPosterior(data)
452+
453+ // Compute posterior mean, variance, and credible interval
454+ def posteriorSummary (data : Seq [Int ]): (Double , Double , (Double , Double )) = {
455+ val (thetaRange, normPosterior) = normalizedPosterior(data)
456+ val meanPosterior = sum(thetaRange *:* normPosterior) * (thetaRange(1 ) - thetaRange(0 ))
457+ val variancePosterior = sum(pow(thetaRange - meanPosterior, 2 ) *:* normPosterior) * (thetaRange(1 ) - thetaRange(0 ))
458+ val credibleInterval = (thetaRange(2 ), thetaRange(97 ))
459+ (meanPosterior, variancePosterior, credibleInterval)
460+ }
461+
462+ // Compute and print posterior summary statistics
463+ val (mean, variance, credibleInterval) = posteriorSummary(data)
464+ println(s " Posterior Mean: $mean" )
465+ println(s " Posterior Variance: $variance" )
466+ println(s " 95% Credible Interval: ${credibleInterval._1} - ${credibleInterval._2}" )
467+
468+ // Frequentist Test Statistics Example: Likelihood Ratio Test
469+ def likelihoodRatioTest (data : Seq [Int ], thetaNull : Double , thetaAlt : Double ): (Double , Double ) = {
470+ val logLikelihoodNull = data.map(x => x * log(thetaNull) + (1 - x) * log(1 - thetaNull)).sum
471+ val logLikelihoodAlt = data.map(x => x * log(thetaAlt) + (1 - x) * log(1 - thetaAlt)).sum
472+ val testStat = 2 * (logLikelihoodAlt - logLikelihoodNull)
473+ val pValue = 1 - breeze.stats.distributions.ChiSquared (1 ).cdf(testStat)
474+ (testStat, pValue)
475+ }
476+
477+ // Perform a likelihood ratio test for two values of theta
478+ val (lrStat, pValue) = likelihoodRatioTest(data, thetaNull = 0.5 , thetaAlt = 0.7 )
479+ println(s " Likelihood Ratio Test Statistic: $lrStat" )
480+ println(s " p-value: $pValue" )
481+ ```
482+
483+ ### Go Code for Bayesian Posterior and Test Statistics
484+
485+ ``` go
486+ package main
487+
488+ import (
489+ " fmt"
490+ " math"
491+ " math/rand"
492+ " sort"
493+
494+ " gonum.org/v1/gonum/stat/distuv"
495+ " gonum.org/v1/plot"
496+ " gonum.org/v1/plot/plotter"
497+ " gonum.org/v1/plot/vg"
498+ )
499+
500+ // Define the prior function (uniform prior)
501+ func prior (theta float64 ) float64 {
502+ if theta >= 0 && theta <= 1 {
503+ return 1
504+ }
505+ return 0
506+ }
507+
508+ // Define the likelihood function (Bernoulli trials)
509+ func likelihood (theta float64 , data []int ) float64 {
510+ likelihood := 1.0
511+ for _ , x := range data {
512+ likelihood *= math.Pow (theta, float64 (x)) * math.Pow (1 -theta, float64 (1 -x))
513+ }
514+ return likelihood
515+ }
516+
517+ // Define the posterior function using Bayes' theorem
518+ func posterior (theta float64 , data []int ) float64 {
519+ return likelihood (theta, data) * prior (theta)
520+ }
521+
522+ // Normalize the posterior distribution
523+ func normalizedPosterior (data []int ) ([]float64 , []float64 ) {
524+ thetas := make ([]float64 , 100 )
525+ posteriors := make ([]float64 , 100 )
526+ sumPosterior := 0.0
527+
528+ for i := 0 ; i < 100 ; i++ {
529+ theta := float64 (i) / 100
530+ thetas[i] = theta
531+ post := posterior (theta, data)
532+ posteriors[i] = post
533+ sumPosterior += post
534+ }
535+
536+ for i := range posteriors {
537+ posteriors[i] /= sumPosterior
538+ }
539+
540+ return thetas, posteriors
541+ }
542+
543+ // Plot the posterior distribution
544+ func plotPosterior (data []int ) {
545+ thetas , posteriors := normalizedPosterior (data)
546+
547+ p , _ := plot.New ()
548+ p.Title .Text = " Posterior Distribution"
549+ p.X .Label .Text = " Theta"
550+ p.Y .Label .Text = " Density"
551+
552+ pts := make (plotter.XYs , len (thetas))
553+ for i := range thetas {
554+ pts[i].X = thetas[i]
555+ pts[i].Y = posteriors[i]
556+ }
557+
558+ line , _ := plotter.NewLine (pts)
559+ p.Add (line)
560+ p.Save (4 *vg.Inch , 4 *vg.Inch , " posterior.png" )
561+ }
562+
563+ // Simulate data (e.g., Bernoulli trials with true parameter 0.7)
564+ func simulateData (size int , prob float64 ) []int {
565+ data := make ([]int , size)
566+ for i := range data {
567+ if rand.Float64 () < prob {
568+ data[i] = 1
569+ } else {
570+ data[i] = 0
571+ }
572+ }
573+ return data
574+ }
575+
576+ // Compute posterior mean, variance, and credible interval
577+ func posteriorSummary (data []int ) (float64 , float64 , [2]float64 ) {
578+ thetas , posteriors := normalizedPosterior (data)
579+
580+ meanPosterior := 0.0
581+ for i := range thetas {
582+ meanPosterior += thetas[i] * posteriors[i]
583+ }
584+
585+ variancePosterior := 0.0
586+ for i := range thetas {
587+ variancePosterior += math.Pow (thetas[i]-meanPosterior, 2 ) * posteriors[i]
588+ }
589+
590+ credibleInterval := [2 ]float64 {thetas[2 ], thetas[97 ]}
591+ return meanPosterior, variancePosterior, credibleInterval
592+ }
593+
594+ // Likelihood ratio test
595+ func likelihoodRatioTest (data []int , thetaNull , thetaAlt float64 ) (float64 , float64 ) {
596+ llNull := 0.0
597+ llAlt := 0.0
598+
599+ for _ , x := range data {
600+ llNull += float64 (x)*math.Log (thetaNull) + float64 (1 -x)*math.Log (1 -thetaNull)
601+ llAlt += float64 (x)*math.Log (thetaAlt) + float64 (1 -x)*math.Log (1 -thetaAlt)
602+ }
603+
604+ testStat := 2 * (llAlt - llNull)
605+ pValue := 1 - distuv.ChiSquared {K: 1 }.CDF (testStat)
606+ return testStat, pValue
607+ }
608+
609+ func main () {
610+ // Simulate data
611+ data := simulateData (20 , 0.7 )
612+
613+ // Plot posterior distribution
614+ plotPosterior (data)
615+
616+ // Compute and print posterior summary statistics
617+ mean , variance , credibleInterval := posteriorSummary (data)
618+ fmt.Printf (" Posterior Mean: %.4f \n " , mean)
619+ fmt.Printf (" Posterior Variance: %.4f \n " , variance)
620+ fmt.Printf (" 95%% Credible Interval: [%.4f , %.4f ]\n " , credibleInterval[0 ], credibleInterval[1 ])
621+
622+ // Perform likelihood ratio test
623+ lrStat , pValue := likelihoodRatioTest (data, 0.5 , 0.7 )
624+ fmt.Printf (" Likelihood Ratio Test Statistic: %.4f \n " , lrStat)
625+ fmt.Printf (" p-value: %.4f \n " , pValue)
626+ }
627+ ```
0 commit comments