Skip to content

Commit d19ccd9

Browse files
committed
chore: code snippets
1 parent c830051 commit d19ccd9

File tree

1 file changed

+230
-0
lines changed

1 file changed

+230
-0
lines changed

_posts/2024-10-28-understanding normality tests a deep dive into their power and limitations.md

Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ keywords:
2020
- python
2121
- r
2222
- ruby
23+
- scala
24+
- go
2325
seo_description: An in-depth exploration of normality tests, their limitations, and the importance of visual inspection for assessing whether data follow a normal distribution.
2426
seo_title: 'Understanding Normality Tests: A Deep Dive'
2527
seo_type: article
@@ -31,6 +33,8 @@ tags:
3133
- python
3234
- r
3335
- ruby
36+
- scala
37+
- go
3438
title: 'Understanding Normality Tests: A Deep Dive into Their Power and Limitations'
3539
---
3640

@@ -481,3 +485,229 @@ sd = Math.sqrt(data.map { |x| (x - data.mean) ** 2 }.sum / data.size)
481485
geary_ratio = mad / sd
482486
puts "Geary's Kurtosis: #{geary_ratio}"
483487
```
488+
489+
# Appendix: Scala Code for Normality Tests
490+
491+
```scala
492+
// Import necessary libraries
493+
import breeze.stats.distributions._
494+
import breeze.plot._
495+
import org.apache.commons.math3.stat.descriptive._
496+
import org.apache.commons.math3.stat.inference._
497+
import org.apache.commons.math3.stat.StatUtils
498+
499+
// Generate the special bimodal distribution
500+
def generateBimodalDistribution(size: Int = 1000): Array[Double] = {
501+
val dist1 = Gaussian(0, 1).sample(size / 2)
502+
val dist2 = Gaussian(3, 0.5).sample(size / 2)
503+
dist1 ++ dist2
504+
}
505+
506+
// Generate data
507+
val data = generateBimodalDistribution()
508+
509+
// QQ plot (using Breeze)
510+
val f = Figure()
511+
val p = f.subplot(0)
512+
p += plot(Gaussian(0, 1).sample(data.length).sorted, data.sorted)
513+
p.title = "QQ Plot"
514+
f.saveas("qq_plot.png")
515+
516+
// Empirical CDF vs Theoretical CDF
517+
val empiricalCDF = data.sorted.zipWithIndex.map { case (value, index) =>
518+
(value, (index + 1).toDouble / data.length)
519+
}
520+
val theoreticalCDF = data.sorted.map(x => (x, Gaussian(data.mean, data.stdDev).cdf(x)))
521+
522+
val f2 = Figure()
523+
val p2 = f2.subplot(0)
524+
p2 += plot(empiricalCDF.map(_._1), empiricalCDF.map(_._2), name = "Empirical CDF")
525+
p2 += plot(theoreticalCDF.map(_._1), theoreticalCDF.map(_._2), name = "Theoretical CDF", style = '-')
526+
p2.title = "Empirical CDF vs Theoretical CDF"
527+
f2.saveas("cdf_plot.png")
528+
529+
// Shapiro-Wilk test (via Apache Commons Math3)
530+
val shapiroTest = new org.apache.commons.math3.stat.inference.ShapiroWilkTest()
531+
val shapiroP = shapiroTest.test(data)
532+
println(s"Shapiro-Wilk Test: p-value = $shapiroP")
533+
534+
// Kolmogorov-Smirnov test
535+
val ksTest = new KolmogorovSmirnovTest()
536+
val ksP = ksTest.kolmogorovSmirnovTest(Gaussian(data.mean, data.stdDev).sample(data.length).toArray, data, true)
537+
println(s"Kolmogorov-Smirnov Test: p-value = $ksP")
538+
539+
// Anderson-Darling test (using Apache Commons Math3)
540+
val adTest = new AndersonDarlingNormalDistributionTest()
541+
val adP = adTest.test(data, true)
542+
println(s"Anderson-Darling Test: p-value = $adP")
543+
544+
// Jarque-Bera test (using Apache Commons Math3)
545+
val skewness = new Skewness().evaluate(data)
546+
val kurtosis = new Kurtosis().evaluate(data)
547+
val jbTest = new JarqueBeraTest()
548+
val jbP = jbTest.test(data)
549+
println(s"Jarque-Bera Test: p-value = $jbP")
550+
551+
// Geary's Kurtosis (using MAD and Standard Deviation)
552+
val mad = StatUtils.percentile(data.map(x => Math.abs(x - StatUtils.percentile(data, 50))), 50)
553+
val stdDev = Math.sqrt(StatUtils.variance(data))
554+
val gearyRatio = mad / stdDev
555+
println(s"Geary's Kurtosis: $gearyRatio")
556+
```
557+
558+
# Appendix: Go Code for Normality Tests
559+
560+
```go
561+
package main
562+
563+
import (
564+
"fmt"
565+
"math"
566+
"math/rand"
567+
"sort"
568+
569+
"gonum.org/v1/gonum/floats"
570+
"gonum.org/v1/gonum/stat"
571+
"gonum.org/v1/plot"
572+
"gonum.org/v1/plot/plotter"
573+
"gonum.org/v1/plot/plotutil"
574+
"gonum.org/v1/plot/vg"
575+
"gonum.org/v1/gonum/stat/distuv"
576+
"github.com/montanaflynn/stats"
577+
)
578+
579+
// Generate a bimodal distribution
580+
func generateBimodalDistribution(size int) []float64 {
581+
data := make([]float64, size)
582+
for i := 0; i < size/2; i++ {
583+
data[i] = rand.NormFloat64()
584+
}
585+
for i := size / 2; i < size; i++ {
586+
data[i] = rand.NormFloat64()*0.5 + 3
587+
}
588+
return data
589+
}
590+
591+
// QQ plot function
592+
func plotQQ(data []float64, fileName string) {
593+
p, err := plot.New()
594+
if err != nil {
595+
panic(err)
596+
}
597+
598+
p.Title.Text = "QQ Plot"
599+
p.X.Label.Text = "Theoretical Quantiles"
600+
p.Y.Label.Text = "Sample Quantiles"
601+
602+
norm := distuv.Normal{Mu: 0, Sigma: 1}
603+
quantiles := make(plotter.XYs, len(data))
604+
605+
sort.Float64s(data)
606+
for i, v := range data {
607+
quantiles[i].X = norm.Quantile(float64(i+1) / float64(len(data)+1))
608+
quantiles[i].Y = v
609+
}
610+
611+
plotutil.AddScatters(p, "QQ", quantiles)
612+
if err := p.Save(4*vg.Inch, 4*vg.Inch, fileName); err != nil {
613+
panic(err)
614+
}
615+
}
616+
617+
// Empirical CDF vs Theoretical CDF
618+
func plotCDF(data []float64, fileName string) {
619+
p, err := plot.New()
620+
if err != nil {
621+
panic(err)
622+
}
623+
624+
p.Title.Text = "Empirical CDF vs Theoretical CDF"
625+
p.X.Label.Text = "x"
626+
p.Y.Label.Text = "Cumulative Probability"
627+
628+
sort.Float64s(data)
629+
empirical := make(plotter.XYs, len(data))
630+
theoretical := make(plotter.XYs, len(data))
631+
norm := distuv.Normal{Mu: stat.Mean(data, nil), Sigma: stat.StdDev(data, nil)}
632+
633+
for i, v := range data {
634+
empirical[i].X = v
635+
empirical[i].Y = float64(i+1) / float64(len(data))
636+
theoretical[i].X = v
637+
theoretical[i].Y = norm.CDF(v)
638+
}
639+
640+
plotutil.AddLines(p, "Empirical CDF", empirical, "Theoretical CDF", theoretical)
641+
if err := p.Save(4*vg.Inch, 4*vg.Inch, fileName); err != nil {
642+
panic(err)
643+
}
644+
}
645+
646+
// Shapiro-Wilk test (external package "github.com/montanaflynn/stats")
647+
func shapiroWilkTest(data []float64) float64 {
648+
w, p := stats.ShapiroWilk(data)
649+
fmt.Printf("Shapiro-Wilk Test: W = %v, p-value = %v\n", w, p)
650+
return p
651+
}
652+
653+
// Kolmogorov-Smirnov test
654+
func kolmogorovSmirnovTest(data []float64) float64 {
655+
norm := distuv.Normal{Mu: stat.Mean(data, nil), Sigma: stat.StdDev(data, nil)}
656+
d := stat.KolmogorovSmirnov(data, norm.CDF)
657+
fmt.Printf("Kolmogorov-Smirnov Test: D = %v\n", d)
658+
return d
659+
}
660+
661+
// Anderson-Darling test (external package "github.com/montanaflynn/stats")
662+
func andersonDarlingTest(data []float64) float64 {
663+
a, _ := stats.AndersonDarling(data)
664+
fmt.Printf("Anderson-Darling Test: A² = %v\n", a)
665+
return a
666+
}
667+
668+
// Jarque-Bera test
669+
func jarqueBeraTest(data []float64) float64 {
670+
skewness := stat.Skew(data, nil)
671+
kurtosis := stat.ExKurtosis(data, nil)
672+
n := float64(len(data))
673+
jb := n / 6.0 * (math.Pow(skewness, 2) + math.Pow(kurtosis, 2)/4.0)
674+
fmt.Printf("Jarque-Bera Test: JB = %v\n", jb)
675+
return jb
676+
}
677+
678+
// Geary's Kurtosis (using MAD and Standard Deviation)
679+
func gearyKurtosis(data []float64) float64 {
680+
median, _ := stats.Median(data)
681+
mad := floats.Sum(floats.Map(func(x float64) float64 { return math.Abs(x - median) }, data)) / float64(len(data))
682+
stdDev := stat.StdDev(data, nil)
683+
geary := mad / stdDev
684+
fmt.Printf("Geary's Kurtosis: %v\n", geary)
685+
return geary
686+
}
687+
688+
func main() {
689+
// Generate data
690+
data := generateBimodalDistribution(1000)
691+
692+
// Plot QQ plot
693+
plotQQ(data, "qq_plot.png")
694+
695+
// Plot CDF plot
696+
plotCDF(data, "cdf_plot.png")
697+
698+
// Perform Shapiro-Wilk test
699+
shapiroWilkTest(data)
700+
701+
// Perform Kolmogorov-Smirnov test
702+
kolmogorovSmirnovTest(data)
703+
704+
// Perform Anderson-Darling test
705+
andersonDarlingTest(data)
706+
707+
// Perform Jarque-Bera test
708+
jarqueBeraTest(data)
709+
710+
// Calculate Geary's Kurtosis
711+
gearyKurtosis(data)
712+
}
713+
```

0 commit comments

Comments
 (0)