mnavascues · Sep 19, 2022 · Sep 19, 2022
Showing with 59 additions and 10 deletions.

+7 −1 1.ABC.Rmd

+6 −1 2.GoodPractices.Rmd

+26 −0 3.ClassicalABC.Rmd

+5 −0 4.Beyond.Rmd

+8 −2 5.ABCRF.Rmd

+6 −6 6.ABCRFcont.R

+1 −0 README.md
diff --git a/1.ABC.Rmd b/1.ABC.Rmd
@@ -29,6 +29,12 @@ source("src/coin_toss.R", local = TRUE)
 source("src/readms.output.R", local =T)
 # functions to calculate summary statistics from ms output
 source("src/ms_sumstats.R", local =T)
+
+# load data and reference table 
+octomanati_data  <- read.ms.output(file="data/octomanati.txt")
+rinocaracol_data <- read.ms.output(file="data/rinocaracol.txt")
+target           <- readRDS(file="data/target.RDS")
+ref_table        <- readRDS(file="data/ref_table_1.RDS")
 ```
 
 ## Approximation 1: Simulation
@@ -263,7 +269,7 @@ Explore the data (you can use functions `S()`, `NH()`, `PI()`, `TajimaD()` and `
 * How many polymorphic sites are there?  How would you apply the rejection algorithm?
 
 
-```{r eval=T, echo=T}
+```{r read_data, eval=TRUE, echo=TRUE}
 octomanati_data  <- read.ms.output(file="data/octomanati.txt")
 rinocaracol_data <- read.ms.output(file="data/rinocaracol.txt")
 ```

diff --git a/2.GoodPractices.Rmd b/2.GoodPractices.Rmd
@@ -21,6 +21,11 @@ library(learnr)
 library(phyclust, quietly=T)
 # functions for ABC
 suppressMessages(library(abc, quietly=T))
+
+# load data and reference table
+target       <- readRDS(file="data/target.RDS")
+ref_table_1  <- readRDS(file="data/ref_table_1.RDS")
+ref_table_1b <- readRDS(file="data/ref_table_1b.RDS")
 ```
 
 ## Evaluation of the model and prior: Goodness of fit
@@ -67,7 +72,7 @@ points(PCA_target[,pc[1]],PCA_target[,pc[2]],col=c("yellow","orange"))
 num_of_sims <- 10000
 # Principal Component Analysis
 PCA_result  <- princomp(ref_table_1[,c("S","PI","NH","TD","FLD")])
-pc<-c(1,2) 
+pc<-c(3,4) 
 plot(PCA_result$scores[seq_len(num_of_sims),pc[1]],
      PCA_result$scores[seq_len(num_of_sims),pc[2]],
      xlab=paste("PCA",pc[1]),ylab=paste("PCA",pc[2]))

diff --git a/3.ClassicalABC.Rmd b/3.ClassicalABC.Rmd
@@ -23,6 +23,11 @@ library(learnr)
 suppressMessages(library(abc, quietly=T))
 # function for weighted histogram
 suppressMessages(library(weights, quietly=T))
+
+# load data and reference table
+target      <- readRDS(file="data/target.RDS")
+ref_table_1 <- readRDS(file="data/ref_table_1.RDS")
+ref_table_2 <- readRDS(file="data/ref_table_2.RDS")
 ```
 
 ## Regression ABC
@@ -140,6 +145,8 @@ wtd.hist(x = log10(theta_prime_accept_reg),
 
 ## Practice classical ABC
 
+
+
 ### Exercise 2
 
 Using ABC, estimate $\theta$ for *octomanati* and *rinocaracol*. Provide a measure of the uncertainty of the estimate:
@@ -294,11 +301,30 @@ model_choice_rinocaracol <- postpr(target=target["rinocaracol",],
                                   sumstat=sumstats,
                                   tol=0.1,
                                   method="mnlogistic")
+# Calculate Bayes Factor (with prior probability 0.5 for each model)
+octomanati_K  <- model_choice_octomanati$pred[2]/model_choice_octomanati$pred[1]
+rinocaracol_K <- model_choice_rinocaracol$pred[2]/model_choice_rinocaracol$pred[1]
 
 cat("Model posterior probabilities for octomanati:"); model_choice_octomanati$pred
+cat("Corresponding Bayes factor is:"); octomanati_K
+
 cat("Model posterior probabilities for rinocaracol:"); model_choice_rinocaracol$pred
+cat("Corresponding Bayes factor is:"); rinocaracol_K
 ```
 
+In Bayesian statistics the support of one model over the alternative is quantified using the [Bayes factor (K)](https://en.wikipedia.org/wiki/Bayes_factor). The strength of the support is often evaluated following Jeffrey's scale of interpretation:
+
+K    | Strength of evidence for Model 1
+-----|--------------------------------
+$<10^0$ | Support for the Model 2
+$10^0$ to $10^{0.5}$ | Weak
+$10^{0.5}$ to $10^{1}$ | Substantial
+$10^1$ to $10^{1.5}$ | Strong
+$10^{1.5}$ to $10^2$ | Very strong
+$>10^2$ | Decisive
+
+
+
 #   
 
 #### References
diff --git a/4.Beyond.Rmd b/4.Beyond.Rmd
@@ -21,6 +21,11 @@ library(learnr)
 suppressMessages(library(abc, quietly=T))
 # function for weighted histogram
 suppressMessages(library(weights, quietly=T))
+
+# load data and reference table
+target      <- readRDS(file="data/target.RDS")
+ref_table_1 <- readRDS(file="data/ref_table_1.RDS")
+ref_table_2 <- readRDS(file="data/ref_table_2.RDS")
 ```
 
 ## Latent variables

diff --git a/5.ABCRF.Rmd b/5.ABCRF.Rmd
@@ -24,6 +24,11 @@ library(learnr)
 library(abcrf)
 # function for weighted histogram
 suppressMessages(library(weights, quietly=T))
+
+# load data and reference table
+target      <- readRDS(file="data/target.RDS")
+ref_table_1 <- readRDS(file="data/ref_table_1.RDS")
+ref_table_2 <- readRDS(file="data/ref_table_2.RDS")
 ```
 
 ## Limitations of classical ABC
@@ -54,7 +59,7 @@ ref_table_2 <- readRDS(file="data/ref_table_2.RDS")
 ```{r model_choice, exercise=TRUE, exercise.lines=30}
 num_of_sims <- 10000
 
-model <- c(rep("constant population size",num_of_sims),rep("population size change",num_of_sims))
+model <- as.factor(c(rep("constant",num_of_sims),rep("change",num_of_sims)))
 sumstats <- rbind(ref_table_1[seq_len(num_of_sims),c("S","PI","NH","TD","FLD")],
                   ref_table_2[seq_len(num_of_sims),c("S","PI","NH","TD","FLD")])
 ref_table <-cbind(model,sumstats)
@@ -84,7 +89,8 @@ model_selection_result_RF <- predict(object= model_RF,
 Estimate parameter $\theta$ from the model of constant population size for *octomanati* using the code provided (it uses function `regAbcrf()`).
 
 * Increase the number of trees if necessary.
-* Are the estimates very different from the ones obtained with classical regression ABC (unit 3)? * The values of $\theta$ from the whole reference table are used to plot the histogram of its posterior probability distribution, why?
+* Are the estimates very different from the ones obtained with classical regression ABC (unit 3)?
+* The values of $\theta$ from the whole reference table are used to plot the histogram of its posterior probability distribution, why?
 
 ```{r eval=T, echo=T}
 target      <- readRDS(file="data/target.RDS")

diff --git a/6.ABCRFcont.R b/6.ABCRFcont.R
@@ -43,7 +43,7 @@ partition.tree(regression_tree,
 ################################
 
 num_of_sims <- 10000
-model <- c(rep("constant population size",num_of_sims),rep("population size change",num_of_sims))
+model <- as.factor(c(rep("constant",num_of_sims),rep("change",num_of_sims)))
 sumstats <- rbind(ref_table_1[seq_len(num_of_sims),c("S","PI","NH","TD","FLD")],
                   ref_table_2[seq_len(num_of_sims),c("S","PI","NH","TD","FLD")])
 ref_table <-cbind(model,sumstats)
@@ -53,13 +53,13 @@ classification_tree <- tree(model ~ TD + FLD, data=ref_table)
 plot(classification_tree)
 text(classification_tree,cex=0.75)
 
-plot(ref_table$TD[which(model=="population size change")],
-     ref_table$FLD[which(model=="population size change")],
+plot(ref_table$TD[which(model=="change")],
+     ref_table$FLD[which(model=="change")],
      xlab="Tajima's D",
      ylab="Fu and Li's D",
      pch=20)
-points(ref_table$TD[which(model=="constant population size")],
-       ref_table$FLD[which(model=="constant population size")],
+points(ref_table$TD[which(model=="constant")],
+       ref_table$FLD[which(model=="constant")],
        col="grey",pch=20)
 partition.tree(classification_tree,
                ordvars=c("TD","FLD"),
@@ -104,7 +104,7 @@ for (i in 1:100){
 
 num_of_sims <- 10000
 
-model <- c(rep("constant",num_of_sims),rep("size change",num_of_sims))
+model <- as.factor(c(rep("constant",num_of_sims),rep("change",num_of_sims)))
 sumstats <- rbind(ref_table_1[seq_len(num_of_sims),c("S","PI","NH","TD","FLD")],
                   ref_table_2[seq_len(num_of_sims),c("S","PI","NH","TD","FLD")])
 ref_table <-cbind(model,sumstats)

diff --git a/README.md b/README.md
@@ -37,6 +37,7 @@ File | Unit
 
 ### todo
 
+* add abc stub
 * add references on abcrf
 * Version in Python of some materials using msprime as simulator, tskit for calculating summary statistics and ranger for abcrf, maybe in a jupyter notebook