Skip to content

Commit 1bbb804

Browse files
committed
bugfix in theta
1 parent 6247dc0 commit 1bbb804

File tree

8 files changed

+118
-44
lines changed

8 files changed

+118
-44
lines changed

DESCRIPTION

+2-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@ Suggests:
3333
ggplot2,
3434
ggforce,
3535
gridExtra,
36-
ggrepel
36+
ggrepel,
37+
MASS
3738
LinkingTo:
3839
BH (>= 1.66.0),
3940
Rcpp (>= 0.12.0),

R/dgu.R

+12-14
Original file line numberDiff line numberDiff line change
@@ -39,18 +39,15 @@ DGU <- function(ud,
3939
pars = m$pars,
4040
refresh = 50)
4141

42-
if(m$model_type=="GU") {
43-
message("Computing summaries ... \n")
44-
gu <- get_gu_summary_gu(glm = glm, hdi_lvl = hdi_lvl, ud = ud)
45-
dgu <- NA
46-
dgu_prob <- NA
47-
}
48-
if(m$model_type=="DGU") {
49-
message("Computing summaries ... \n")
50-
gu <- get_gu_summary_dgu(glm = glm, hdi_lvl = hdi_lvl, ud = ud)
51-
dgu <- get_dgu_summary(glm = glm, hdi_lvl = hdi_lvl, ud = ud)
52-
dgu_prob <- get_dgu_prob_summary(glm = glm, hdi_lvl = hdi_lvl, ud = ud)
53-
}
42+
message("Computing summaries ... \n")
43+
gu <- get_condition_prop(glm = glm, hdi_lvl = hdi_lvl, ud = ud,
44+
model_type = m$model_type)
45+
dgu <- get_dgu(glm = glm, hdi_lvl = hdi_lvl, ud = ud,
46+
model_type = m$model_type)
47+
dgu_prob <- get_dgu_prob(glm = glm, hdi_lvl = hdi_lvl, ud = ud,
48+
model_type = m$model_type)
49+
theta <- get_sample_prop_gu(glm = glm, hdi_lvl = hdi_lvl, ud = ud)
50+
5451

5552
# ppc
5653
message("Computing posterior predictions ... \n")
@@ -62,7 +59,8 @@ DGU <- function(ud,
6259
return (list(dgu = dgu,
6360
dgu_prob = dgu_prob,
6461
gu = gu,
65-
glm = glm,
62+
theta = theta,
6663
ppc = ppc,
67-
ud = ud))
64+
ud = ud,
65+
fit = glm))
6866
}

R/loo.R

+4-8
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,6 @@
11

22
# Description:
33
# LOO = leave-one-out
4-
# ud: 4 columns
5-
# * sample_id: char column
6-
# * condition: char column
7-
# * gene_name: char column
8-
# * gene_usage_count: num column
94
LOO <- function(ud,
105
mcmc_warmup = 500,
116
mcmc_steps = 1500,
@@ -28,8 +23,7 @@ LOO <- function(ud,
2823
ud <- udp$proc_ud
2924

3025
# setup control list
31-
control_list <- list(adapt_delta = adapt_delta,
32-
max_treedepth = max_treedepth)
26+
control_list <- list(adapt_delta = adapt_delta, max_treedepth = max_treedepth)
3327

3428
# unique repertoire names
3529
ud$loo_id <- ud$sample_id
@@ -74,6 +68,9 @@ LOO <- function(ud,
7468
if(is.data.frame(out$dgu_prob)==TRUE) {
7569
out$dgu_prob$loo_id <- rs[r]
7670
}
71+
if(is.data.frame(out$theta)==TRUE) {
72+
out$theta$loo_id <- rs[r]
73+
}
7774

7875
# collect results
7976
loo_out[[rs[r]]] <- out
@@ -82,4 +79,3 @@ LOO <- function(ud,
8279
return (loo_out)
8380
}
8481

85-

R/utils_dgu.R

+8-2
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,10 @@ get_contrast_map <- function(ud) {
2222
}
2323

2424

25-
get_dgu_summary <- function(glm, hdi_lvl, ud) {
25+
get_dgu <- function(glm, hdi_lvl, ud, model_type) {
26+
if(model_type == "GU") {
27+
return(NA)
28+
}
2629
contrast_map <- get_contrast_map(ud)
2730

2831
dgu_summary <- summary(object = glm,
@@ -69,7 +72,10 @@ get_dgu_summary <- function(glm, hdi_lvl, ud) {
6972
}
7073

7174

72-
get_dgu_prob_summary <- function(glm, hdi_lvl, ud) {
75+
get_dgu_prob <- function(glm, hdi_lvl, ud, model_type) {
76+
if(model_type == "GU") {
77+
return(NA)
78+
}
7379
contrast_map <- get_contrast_map(ud)
7480

7581
dgu_summary <- summary(object = glm,

R/utils_gu.R

+48-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,15 @@
11

2-
get_gu_summary_dgu <- function(glm, hdi_lvl, ud) {
2+
get_condition_prop <- function(glm, hdi_lvl, ud, model_type) {
3+
if(model_type == "DGU") {
4+
return(get_condition_prop_dgu(glm = glm, hdi_lvl = hdi_lvl, ud = ud))
5+
}
6+
if(model_type == "GU") {
7+
return(get_condition_prop_gu(glm = glm, hdi_lvl = hdi_lvl, ud = ud))
8+
}
9+
}
10+
11+
12+
get_condition_prop_dgu <- function(glm, hdi_lvl, ud) {
313

414
gu_summary <- summary(object = glm,
515
digits = 4,
@@ -38,7 +48,7 @@ get_gu_summary_dgu <- function(glm, hdi_lvl, ud) {
3848
}
3949

4050

41-
get_gu_summary_gu <- function(glm, hdi_lvl, ud) {
51+
get_condition_prop_gu <- function(glm, hdi_lvl, ud) {
4252

4353
gu_summary <- summary(object = glm,
4454
digits = 4,
@@ -69,3 +79,39 @@ get_gu_summary_gu <- function(glm, hdi_lvl, ud) {
6979
return(gu_summary)
7080
}
7181

82+
83+
get_sample_prop_gu <- function(glm, hdi_lvl, ud) {
84+
85+
gu <- summary(object = glm, digits = 4, pars = "theta",
86+
prob = c(0.5, (1-hdi_lvl)/2, 1-(1-hdi_lvl)/2))
87+
gu <- data.frame(gu$summary)
88+
colnames(gu) <- c("theta_mean", "theta_mean_se",
89+
"theta_sd", "theta_median",
90+
"theta_L", "theta_H",
91+
"Neff", "Rhat")
92+
gu[, c("Rhat", "Neff")] <- NULL
93+
94+
par <- rownames(gu)
95+
par <- gsub(pattern = "theta|\\[|\\]", replacement = '', x = par)
96+
par <- do.call(rbind, strsplit(x = par, split = ','))
97+
98+
gu$gene_id <- as.numeric(par[,2])
99+
gu$sample_id <- as.numeric(par[,1])
100+
101+
gu$gene_name <- ud$gene_names[gu$gene_id]
102+
gu$sample_name <- ud$sample_names[gu$sample_id]
103+
104+
m <- ud$proc_ud[, c("sample_id", "individual_id", "individual_org_name",
105+
"gene_name", "condition", "gene_usage_prop")]
106+
107+
gu <- merge(x = gu, y = m,
108+
by.x = c("sample_id", "gene_name"),
109+
by.y = c("sample_id", "gene_name"))
110+
111+
# remove unused vars
112+
gu$gene_id <- NULL
113+
gu$sample_id <- NULL
114+
rownames(gu) <- NULL
115+
return(gu)
116+
}
117+

R/utils_usage.R

+4-4
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ get_model <- function(has_replicates, has_conditions, debug = FALSE) {
106106
"sigma_individual", "sigma_replicate",
107107
"beta_sample", "beta_individual", "beta_condition",
108108
"Yhat_rep", "Yhat_rep_prop", "Yhat_condition_prop",
109-
"log_lik")
109+
"log_lik", "theta")
110110
model_name <- "GU_rep"
111111
}
112112
else {
@@ -119,7 +119,7 @@ get_model <- function(has_replicates, has_conditions, debug = FALSE) {
119119
"sigma_individual",
120120
"beta_individual", "beta_condition",
121121
"Yhat_rep", "Yhat_rep_prop", "Yhat_condition_prop",
122-
"log_lik")
122+
"log_lik", "theta")
123123
model_name <- "GU"
124124
}
125125
}
@@ -134,7 +134,7 @@ get_model <- function(has_replicates, has_conditions, debug = FALSE) {
134134
"sigma_condition", "sigma_individual", "sigma_replicate",
135135
"beta_sample", "beta_individual", "beta_condition",
136136
"Yhat_rep", "Yhat_rep_prop", "Yhat_condition_prop",
137-
"log_lik", "dgu", "dgu_prob")
137+
"log_lik", "dgu", "dgu_prob", "theta")
138138
model_name <- "DGU_rep"
139139
}
140140
else {
@@ -147,7 +147,7 @@ get_model <- function(has_replicates, has_conditions, debug = FALSE) {
147147
"sigma_condition", "sigma_individual",
148148
"beta_individual", "beta_condition",
149149
"Yhat_rep", "Yhat_rep_prop", "Yhat_condition_prop",
150-
"log_lik", "dgu", "dgu_prob")
150+
"log_lik", "dgu", "dgu_prob", "theta")
151151
model_name <- "DGU"
152152
}
153153
}

man/DGU.Rd

+2-2
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ of HDI), H (high boundary of HDI); 2) contrast = direction of the effect; 3)
5858
pmax = probability of DGU. This summary is only available if the input data
5959
contains at least two conditions}
6060
\item{gu}{gene usage (GU) summary of each gene in each condition}
61-
\item{glm}{stanfit object}
61+
\item{fit}{stanfit object}
6262
\item{ppc}{two types of posterior predictive checks: 1) repertoire-
6363
specific, 2) condition-specific}
6464
\item{ud}{processed gene usage data used for the model}
@@ -86,7 +86,7 @@ M <- DGU(ud = d_zibb_2,
8686
max_treedepth = 10)
8787

8888
# look at DGU results
89-
head(M$glm)
89+
head(M$fit)
9090

9191
# look at posterior predictive checks (PPC)
9292
head(M$ppc)

vignettes/User_Manual.Rmd

+38-11
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ require(ggforce)
2828
require(gridExtra)
2929
require(ggrepel)
3030
require(reshape2)
31+
require(MASS)
3132
```
3233

3334

@@ -221,15 +222,15 @@ summary(M)
221222
* none found
222223

223224
```{r}
224-
rstan::check_hmc_diagnostics(M$glm)
225+
rstan::check_hmc_diagnostics(M$fit)
225226
```
226227

227228
* rhat < 1.03 and n_eff > 0
228229

229230

230231
```{r, fig.height = 3, fig.width = 6}
231-
gridExtra::grid.arrange(rstan::stan_rhat(object = M$glm),
232-
rstan::stan_ess(object = M$glm),
232+
gridExtra::grid.arrange(rstan::stan_rhat(object = M$fit),
233+
rstan::stan_ess(object = M$fit),
233234
nrow = 1)
234235
```
235236

@@ -271,7 +272,8 @@ ggplot(data = M$ppc$ppc_condition)+
271272
theme_bw(base_size = 11)+
272273
theme(legend.position = "top")+
273274
xlab(label = "Observed usage [%]")+
274-
ylab(label = "PPC usage [%]")
275+
ylab(label = "PPC usage [%]")+
276+
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4))
275277
```
276278

277279

@@ -288,7 +290,7 @@ For `es` we also have the mean, median standard error (se), standard
288290
deviation (sd), L (low bound of 95% HDI), H (high bound of 95% HDI)
289291

290292
```{r}
291-
kable(x = head(M$dgu), row.names = FALSE, digits = 3)
293+
kable(x = head(M$dgu), row.names = FALSE, digits = 2)
292294
```
293295

294296
### DGU: differential gene usage
@@ -352,6 +354,7 @@ ggplot()+
352354
position = position_dodge(width = 0.35), width = 0.15)+
353355
theme_bw(base_size = 11)+
354356
theme(legend.position = "top")+
357+
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4))+
355358
ylab(label = "PPC usage [%]")+
356359
xlab(label = '')
357360
```
@@ -371,7 +374,8 @@ ggplot()+
371374
theme_bw(base_size = 11)+
372375
theme(legend.position = "top")+
373376
ylab(label = "PPC usage [count]")+
374-
xlab(label = '')
377+
xlab(label = '')+
378+
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4))
375379
```
376380

377381
## GU: gene usage summary
@@ -389,7 +393,8 @@ ggplot(data = M$gu)+
389393
position = position_dodge(width = 0.4))+
390394
theme_bw(base_size = 11)+
391395
theme(legend.position = "top")+
392-
ylab(label = "GU [probability]")
396+
ylab(label = "GU [probability]")+
397+
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4))
393398
```
394399

395400
# Leave-one-out (LOO) analysis
@@ -439,7 +444,8 @@ ggplot(data = L_dgu)+
439444
position = position_dodge(width = 0.5))+
440445
theme_bw(base_size = 11)+
441446
theme(legend.position = "top")+
442-
ylab(expression(gamma))
447+
ylab(expression(gamma))+
448+
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4))
443449
```
444450

445451
## LOO-DGU: variability of $\pi$
@@ -451,13 +457,14 @@ ggplot(data = L_dgu)+
451457
position = position_dodge(width = 0.5))+
452458
theme_bw(base_size = 11)+
453459
theme(legend.position = "top")+
454-
ylab(expression(pi))
460+
ylab(expression(pi))+
461+
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4))
455462
```
456463

457464

458465
## LOO-GU: variability of the gene usage
459466

460-
```{r, fig.width=6.5, fig.height=4}
467+
```{r, fig.width=6, fig.height=4}
461468
ggplot(data = L_gu)+
462469
geom_hline(yintercept = 0, linetype = "dashed", col = "gray")+
463470
geom_errorbar(aes(x = gene_name, y = prob_mean, ymin = prob_L,
@@ -469,9 +476,29 @@ ggplot(data = L_gu)+
469476
position = position_dodge(width = 0.5))+
470477
theme_bw(base_size = 11)+
471478
theme(legend.position = "top")+
472-
ylab("GU [probability]")
479+
ylab("GU [probability]")+
480+
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4))
473481
```
474482

483+
# Multidimensional scaling (MDS) analaysis based
484+
485+
```{r, fig.width=5, fig.height=4}
486+
# x <- M$theta
487+
x <- acast(individual_id~gene_name,
488+
data = M$theta, value.var = "theta_mean")
489+
490+
nmds <- isoMDS(dist(x), k=2)
491+
492+
x <- data.frame(x = nmds$points[,1], y = nmds$points[,2], id = row.names(x))
493+
494+
ggplot(data = x)+
495+
geom_point(aes(x = x, y = y), shape = 21, size = 2)+
496+
geom_text_repel(aes(x = x, y = y, label = id),
497+
size = 3.5, min.segment.length = 0)+
498+
theme_bw()
499+
```
500+
501+
475502
# Session
476503

477504
```{r}

0 commit comments

Comments
 (0)