@@ -20,6 +20,8 @@ keywords:
2020- python
2121- r
2222- ruby
23+ - scala
24+ - go
2325seo_description : An in-depth exploration of normality tests, their limitations, and the importance of visual inspection for assessing whether data follow a normal distribution.
2426seo_title : ' Understanding Normality Tests: A Deep Dive'
2527seo_type : article
3133- python
3234- r
3335- ruby
36+ - scala
37+ - go
3438title : ' Understanding Normality Tests: A Deep Dive into Their Power and Limitations'
3539---
3640
@@ -481,3 +485,229 @@ sd = Math.sqrt(data.map { |x| (x - data.mean) ** 2 }.sum / data.size)
481485geary_ratio = mad / sd
482486puts " Geary's Kurtosis: #{ geary_ratio } "
483487```
488+
489+ # Appendix: Scala Code for Normality Tests
490+
491+ ``` scala
492+ // Import necessary libraries
493+ import breeze .stats .distributions ._
494+ import breeze .plot ._
495+ import org .apache .commons .math3 .stat .descriptive ._
496+ import org .apache .commons .math3 .stat .inference ._
497+ import org .apache .commons .math3 .stat .StatUtils
498+
499+ // Generate the special bimodal distribution
500+ def generateBimodalDistribution (size : Int = 1000 ): Array [Double ] = {
501+ val dist1 = Gaussian (0 , 1 ).sample(size / 2 )
502+ val dist2 = Gaussian (3 , 0.5 ).sample(size / 2 )
503+ dist1 ++ dist2
504+ }
505+
506+ // Generate data
507+ val data = generateBimodalDistribution()
508+
509+ // QQ plot (using Breeze)
510+ val f = Figure ()
511+ val p = f.subplot(0 )
512+ p += plot(Gaussian (0 , 1 ).sample(data.length).sorted, data.sorted)
513+ p.title = " QQ Plot"
514+ f.saveas(" qq_plot.png" )
515+
516+ // Empirical CDF vs Theoretical CDF
517+ val empiricalCDF = data.sorted.zipWithIndex.map { case (value, index) =>
518+ (value, (index + 1 ).toDouble / data.length)
519+ }
520+ val theoreticalCDF = data.sorted.map(x => (x, Gaussian (data.mean, data.stdDev).cdf(x)))
521+
522+ val f2 = Figure ()
523+ val p2 = f2.subplot(0 )
524+ p2 += plot(empiricalCDF.map(_._1), empiricalCDF.map(_._2), name = " Empirical CDF" )
525+ p2 += plot(theoreticalCDF.map(_._1), theoreticalCDF.map(_._2), name = " Theoretical CDF" , style = '-' )
526+ p2.title = " Empirical CDF vs Theoretical CDF"
527+ f2.saveas(" cdf_plot.png" )
528+
529+ // Shapiro-Wilk test (via Apache Commons Math3)
530+ val shapiroTest = new org.apache.commons.math3.stat.inference.ShapiroWilkTest ()
531+ val shapiroP = shapiroTest.test(data)
532+ println(s " Shapiro-Wilk Test: p-value = $shapiroP" )
533+
534+ // Kolmogorov-Smirnov test
535+ val ksTest = new KolmogorovSmirnovTest ()
536+ val ksP = ksTest.kolmogorovSmirnovTest(Gaussian (data.mean, data.stdDev).sample(data.length).toArray, data, true )
537+ println(s " Kolmogorov-Smirnov Test: p-value = $ksP" )
538+
539+ // Anderson-Darling test (using Apache Commons Math3)
540+ val adTest = new AndersonDarlingNormalDistributionTest ()
541+ val adP = adTest.test(data, true )
542+ println(s " Anderson-Darling Test: p-value = $adP" )
543+
544+ // Jarque-Bera test (using Apache Commons Math3)
545+ val skewness = new Skewness ().evaluate(data)
546+ val kurtosis = new Kurtosis ().evaluate(data)
547+ val jbTest = new JarqueBeraTest ()
548+ val jbP = jbTest.test(data)
549+ println(s " Jarque-Bera Test: p-value = $jbP" )
550+
551+ // Geary's Kurtosis (using MAD and Standard Deviation)
552+ val mad = StatUtils .percentile(data.map(x => Math .abs(x - StatUtils .percentile(data, 50 ))), 50 )
553+ val stdDev = Math .sqrt(StatUtils .variance(data))
554+ val gearyRatio = mad / stdDev
555+ println(s " Geary's Kurtosis: $gearyRatio" )
556+ ```
557+
558+ # Appendix: Go Code for Normality Tests
559+
560+ ``` go
561+ package main
562+
563+ import (
564+ " fmt"
565+ " math"
566+ " math/rand"
567+ " sort"
568+
569+ " gonum.org/v1/gonum/floats"
570+ " gonum.org/v1/gonum/stat"
571+ " gonum.org/v1/plot"
572+ " gonum.org/v1/plot/plotter"
573+ " gonum.org/v1/plot/plotutil"
574+ " gonum.org/v1/plot/vg"
575+ " gonum.org/v1/gonum/stat/distuv"
576+ " github.com/montanaflynn/stats"
577+ )
578+
579+ // Generate a bimodal distribution
580+ func generateBimodalDistribution (size int ) []float64 {
581+ data := make ([]float64 , size)
582+ for i := 0 ; i < size/2 ; i++ {
583+ data[i] = rand.NormFloat64 ()
584+ }
585+ for i := size / 2 ; i < size; i++ {
586+ data[i] = rand.NormFloat64 ()*0.5 + 3
587+ }
588+ return data
589+ }
590+
591+ // QQ plot function
592+ func plotQQ (data []float64 , fileName string ) {
593+ p , err := plot.New ()
594+ if err != nil {
595+ panic (err)
596+ }
597+
598+ p.Title .Text = " QQ Plot"
599+ p.X .Label .Text = " Theoretical Quantiles"
600+ p.Y .Label .Text = " Sample Quantiles"
601+
602+ norm := distuv.Normal {Mu: 0 , Sigma: 1 }
603+ quantiles := make (plotter.XYs , len (data))
604+
605+ sort.Float64s (data)
606+ for i , v := range data {
607+ quantiles[i].X = norm.Quantile (float64 (i+1 ) / float64 (len (data)+1 ))
608+ quantiles[i].Y = v
609+ }
610+
611+ plotutil.AddScatters (p, " QQ" , quantiles)
612+ if err := p.Save (4 *vg.Inch , 4 *vg.Inch , fileName); err != nil {
613+ panic (err)
614+ }
615+ }
616+
617+ // Empirical CDF vs Theoretical CDF
618+ func plotCDF (data []float64 , fileName string ) {
619+ p , err := plot.New ()
620+ if err != nil {
621+ panic (err)
622+ }
623+
624+ p.Title .Text = " Empirical CDF vs Theoretical CDF"
625+ p.X .Label .Text = " x"
626+ p.Y .Label .Text = " Cumulative Probability"
627+
628+ sort.Float64s (data)
629+ empirical := make (plotter.XYs , len (data))
630+ theoretical := make (plotter.XYs , len (data))
631+ norm := distuv.Normal {Mu: stat.Mean (data, nil ), Sigma: stat.StdDev (data, nil )}
632+
633+ for i , v := range data {
634+ empirical[i].X = v
635+ empirical[i].Y = float64 (i+1 ) / float64 (len (data))
636+ theoretical[i].X = v
637+ theoretical[i].Y = norm.CDF (v)
638+ }
639+
640+ plotutil.AddLines (p, " Empirical CDF" , empirical, " Theoretical CDF" , theoretical)
641+ if err := p.Save (4 *vg.Inch , 4 *vg.Inch , fileName); err != nil {
642+ panic (err)
643+ }
644+ }
645+
646+ // Shapiro-Wilk test (external package "github.com/montanaflynn/stats")
647+ func shapiroWilkTest (data []float64 ) float64 {
648+ w , p := stats.ShapiroWilk (data)
649+ fmt.Printf (" Shapiro-Wilk Test: W = %v , p-value = %v \n " , w, p)
650+ return p
651+ }
652+
653+ // Kolmogorov-Smirnov test
654+ func kolmogorovSmirnovTest (data []float64 ) float64 {
655+ norm := distuv.Normal {Mu: stat.Mean (data, nil ), Sigma: stat.StdDev (data, nil )}
656+ d := stat.KolmogorovSmirnov (data, norm.CDF )
657+ fmt.Printf (" Kolmogorov-Smirnov Test: D = %v \n " , d)
658+ return d
659+ }
660+
661+ // Anderson-Darling test (external package "github.com/montanaflynn/stats")
662+ func andersonDarlingTest (data []float64 ) float64 {
663+ a , _ := stats.AndersonDarling (data)
664+ fmt.Printf (" Anderson-Darling Test: A² = %v \n " , a)
665+ return a
666+ }
667+
668+ // Jarque-Bera test
669+ func jarqueBeraTest (data []float64 ) float64 {
670+ skewness := stat.Skew (data, nil )
671+ kurtosis := stat.ExKurtosis (data, nil )
672+ n := float64 (len (data))
673+ jb := n / 6.0 * (math.Pow (skewness, 2 ) + math.Pow (kurtosis, 2 )/4.0 )
674+ fmt.Printf (" Jarque-Bera Test: JB = %v \n " , jb)
675+ return jb
676+ }
677+
678+ // Geary's Kurtosis (using MAD and Standard Deviation)
679+ func gearyKurtosis (data []float64 ) float64 {
680+ median , _ := stats.Median (data)
681+ mad := floats.Sum (floats.Map (func (x float64 ) float64 { return math.Abs (x - median) }, data)) / float64 (len (data))
682+ stdDev := stat.StdDev (data, nil )
683+ geary := mad / stdDev
684+ fmt.Printf (" Geary's Kurtosis: %v \n " , geary)
685+ return geary
686+ }
687+
688+ func main () {
689+ // Generate data
690+ data := generateBimodalDistribution (1000 )
691+
692+ // Plot QQ plot
693+ plotQQ (data, " qq_plot.png" )
694+
695+ // Plot CDF plot
696+ plotCDF (data, " cdf_plot.png" )
697+
698+ // Perform Shapiro-Wilk test
699+ shapiroWilkTest (data)
700+
701+ // Perform Kolmogorov-Smirnov test
702+ kolmogorovSmirnovTest (data)
703+
704+ // Perform Anderson-Darling test
705+ andersonDarlingTest (data)
706+
707+ // Perform Jarque-Bera test
708+ jarqueBeraTest (data)
709+
710+ // Calculate Geary's Kurtosis
711+ gearyKurtosis (data)
712+ }
713+ ```
0 commit comments