Skip to content

Commit 309f546

Browse files
committed
update of case study 5
1 parent d43cb09 commit 309f546

File tree

5 files changed

+253
-23
lines changed

5 files changed

+253
-23
lines changed

data/d_zibb_5.RData

643 Bytes
Binary file not shown.

inst/scripts/d_zibb_5.R

+149
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
require(rstan)
2+
3+
# Stan generative model
4+
sim_stan <- "
5+
functions {
6+
int zibb_rng(int y, int n, real mu, real phi, real kappa) {
7+
if (bernoulli_rng(kappa) == 1) {
8+
return (0);
9+
} else {
10+
return (beta_binomial_rng(n, mu * phi, (1 - mu) * phi));
11+
}
12+
}
13+
}
14+
15+
data {
16+
int<lower=0> N_sample; // number of repertoires
17+
int<lower=0> N_gene; // gene
18+
int<lower=0> N_individual; // number of individuals
19+
int<lower=0> N_condition; // number of conditions
20+
array [N_sample] int N; // repertoire size
21+
array [N_sample] int condition_id; // id of conditions
22+
array [N_sample] int individual_id; // id of replicate
23+
real <lower=0> phi;
24+
real <lower=0, upper=1> kappa;
25+
array [N_condition] vector [N_gene] beta_condition;
26+
vector <lower=0> [N_condition] sigma_condition;
27+
real <lower=0> sigma_alpha;
28+
}
29+
30+
generated quantities {
31+
vector [N_gene] alpha;
32+
array [N_individual] vector [N_gene] alpha_individual;
33+
array [N_sample] vector [N_gene] beta_individual;
34+
// generate usage
35+
array [N_sample] vector <lower=0, upper=1> [N_gene] theta;
36+
array [N_gene, N_sample] int Y;
37+
38+
for(i in 1:N_gene) {
39+
alpha[i] = normal_rng(-3, 0.5);
40+
}
41+
42+
for(i in 1:N_sample) {
43+
for(j in 1:N_gene) {
44+
45+
alpha_individual[individual_id[i]][j] = normal_rng(alpha[j], sigma_alpha);
46+
47+
beta_individual[i][j] = normal_rng(beta_condition[condition_id[i]][j], sigma_condition[condition_id[i]]);
48+
49+
theta[i][j] = inv_logit(alpha_individual[individual_id[i]][j] + beta_individual[i][j]);
50+
Y[j, i] = zibb_rng(Y[j, i], N[i], theta[i][j], phi, kappa);
51+
}
52+
}
53+
}
54+
"
55+
56+
# compile model
57+
model <- rstan::stan_model(model_code = sim_stan)
58+
59+
60+
61+
# generate data based on the following parameters parameters
62+
set.seed(11132)
63+
N_gene <- 8
64+
N_individual <- 10
65+
N_condition <- 2
66+
N_sample <- N_individual*N_condition
67+
68+
condition_id <- rep(x = 1:N_condition, each = N_individual)
69+
70+
N <- rep(x = 1000, times = N_sample)
71+
72+
individual_id <- rep(x = 1:N_individual, times = N_condition)
73+
74+
phi <- 200
75+
kappa <- 0.02
76+
77+
beta_condition <- array(data = 0, dim = c(N_condition, N_gene))
78+
for(c in 1:N_condition) {
79+
for(g in 1:N_gene) {
80+
u <- runif(n = 1, min = 0, max = 1)
81+
if(u <= 0.8) {
82+
beta_condition[c,g] <- rnorm(n = 1, mean = 0, sd = 0.1)
83+
} else {
84+
beta_condition[c,g] <- rnorm(n = 1, mean = 0, sd = 2)
85+
}
86+
}
87+
}
88+
89+
sigma_condition <- rep(x = 0.5, times = N_condition)
90+
sigma_alpha <- 0.25
91+
92+
93+
l <- list(N_sample = N_sample,
94+
N_gene = N_gene,
95+
N_individual = N_individual,
96+
N_condition = N_condition,
97+
N = N,
98+
condition_id = condition_id,
99+
individual_id = individual_id,
100+
phi = phi,
101+
kappa = kappa,
102+
beta_condition = beta_condition,
103+
sigma_condition = sigma_condition,
104+
sigma_alpha = sigma_alpha)
105+
106+
# simulate
107+
sim <- rstan::sampling(object = model,
108+
data = l,
109+
iter = 1,
110+
chains = 1,
111+
algorithm="Fixed_param")
112+
113+
# extract simulation and convert into data frame which can
114+
# be used as input of IgGeneUsage
115+
ysim <- rstan::extract(object = sim, par = "Y")$Y
116+
ysim <- ysim[1,,]
117+
118+
ysim_df <- reshape2::melt(ysim)
119+
colnames(ysim_df) <- c("gene_name", "sample_id", "gene_usage_count")
120+
121+
m <- data.frame(sample_id = 1:l$N_sample,
122+
individual_id = l$individual_id,
123+
condition_id = l$condition_id)
124+
125+
ysim_df <- merge(x = ysim_df, y = m, by = "sample_id", all.x = T)
126+
127+
ysim_df$condition <- paste0("C_", ysim_df$condition_id)
128+
ysim_df$gene_name <- paste0("G_", ysim_df$gene_name)
129+
ysim_df$individual_id <- paste0("I_", ysim_df$individual_id)
130+
131+
ysim_df$condition_id <- NULL
132+
ysim_df$sample_id <- NULL
133+
ysim_df <- ysim_df[, c("individual_id", "condition", "gene_name",
134+
"gene_usage_count")]
135+
136+
d_zibb_5 <- ysim_df
137+
138+
# save
139+
save(d_zibb_5, file = "data/d_zibb_5.RData", compress = T)
140+
141+
142+
# save(sim, file = "mytests/sim_d_zibb_4.RData", compress = T)
143+
ggplot(data = d_zibb_5)+
144+
facet_wrap(facets = ~gene_name, scales = "free_y")+
145+
geom_point(aes(x = condition, y = gene_usage_count, group = individual_id))+
146+
geom_line(aes(x = condition, y = gene_usage_count, group = individual_id))+
147+
theme_bw(base_size = 10)+
148+
theme(legend.position = "none")
149+

inst/stan/dgu_paired.stan

+1-1
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ transformed parameters {
7272
for(i in 1:N_individual) {
7373
alpha_individual[i] = alpha + sigma_alpha * z_alpha_individual[i];
7474
for(j in 1:N_condition) {
75-
beta_individual[i,j] = beta_condition[j] + sigma_individual[j] * z_beta_individual[i,j];
75+
beta_individual[i,j] = beta_condition[j] + sigma_individual[j] * z_beta_individual[i,j];
7676
}
7777
}
7878

man/d_zibb_5.Rd

+39
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
\name{d_zibb_5}
2+
\alias{d_zibb_5}
3+
\docType{data}
4+
\title{Simulated Ig gene usage data}
5+
6+
\description{
7+
A small example of paired-sample IRRs with these features:
8+
9+
\itemize{
10+
\item 2 conditions
11+
\item 10 individuals with one IRRs per condition
12+
\item 8 Ig genes
13+
}
14+
This dataset was simulated from zero-inflated beta-binomial (ZIBB)
15+
distribution. Simulation code is available in inst/scripts/d_zibb_5.R
16+
}
17+
18+
\usage{
19+
data("d_zibb_5", package = "IgGeneUsage")
20+
}
21+
22+
\format{
23+
A data frame with 4 columns:
24+
\itemize{
25+
\item "individual_id"
26+
\item "condition"
27+
\item "gene_name"
28+
\item "gene_name_count"
29+
}
30+
This format is accepted by IgGeneUsage.
31+
}
32+
\source{
33+
Simulation code is provided in inst/scripts/d_zibb_5.R
34+
}
35+
\examples{
36+
data("d_zibb_5", package = "IgGeneUsage")
37+
head(d_zibb_5)
38+
}
39+
\keyword{d_zibb_5}

vignettes/User_Manual.Rmd

+64-22
Original file line numberDiff line numberDiff line change
@@ -548,24 +548,13 @@ g2 <- ggplot(data = stats)+
548548
# Case Study C: analyzing paired-IRRs
549549

550550
```{r}
551-
require(IgGeneUsage)
552-
require(rstan)
553-
require(knitr)
554-
require(ggplot2)
555-
require(ggforce)
556-
require(ggrepel)
557-
require(reshape2)
558-
require(patchwork)
559-
```
560-
561-
```{r}
562-
data("d_zibb_3", package = "IgGeneUsage")
551+
data("d_zibb_5", package = "IgGeneUsage")
563552
```
564553

565554

566-
```{r, fig.width=6, fig.height=6}
567-
ggplot(data = d_zibb_3)+
568-
facet_wrap(facets = ~gene_name)+
555+
```{r, fig.width=6, fig.height=4}
556+
ggplot(data = d_zibb_5)+
557+
facet_wrap(facets = ~gene_name, ncol = 4, scales = "free_y")+
569558
geom_line(aes(x = condition, y = gene_usage_count, group = individual_id))+
570559
geom_point(aes(x = condition, y = gene_usage_count, col = condition))+
571560
theme_bw(base_size = 11)+
@@ -575,12 +564,11 @@ ggplot(data = d_zibb_3)+
575564
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4))
576565
```
577566

578-
579567
```{r}
580-
M <- DGU(ud = d_zibb_3, # input data
568+
M <- DGU(ud = d_zibb_5, # input data
581569
mcmc_warmup = 500, # how many MCMC warm-ups per chain (default: 500)
582570
mcmc_steps = 1500, # how many MCMC steps per chain (default: 1,500)
583-
mcmc_chains = 3, # how many MCMC chain to run (default: 4)
571+
mcmc_chains = 1, # how many MCMC chain to run (default: 4)
584572
mcmc_cores = 3, # how many PC cores to use? (e.g. parallel chains)
585573
hdi_lvl = 0.95, # highest density interval level (de fault: 0.95)
586574
adapt_delta = 0.8, # MCMC target acceptance rate (default: 0.95)
@@ -590,16 +578,15 @@ M <- DGU(ud = d_zibb_3, # input data
590578

591579

592580
```{r, fig.height = 3, fig.width = 6}
593-
rstan::stan_rhat(object = M$fit)|
594-
rstan::stan_ess(object = M$fit)
581+
rstan::stan_rhat(object = M$fit)|rstan::stan_ess(object = M$fit)
595582
```
596583

597584

598585
```{r, fig.weight = 7, fig.height = 4}
599586
g1 <- ggplot(data = M$gu)+
600587
geom_errorbar(aes(x = gene_name, y = prob_mean, ymin = prob_L,
601588
ymax = prob_H, col = condition),
602-
width = 0.1, position = position_dodge(width = 0.4))+
589+
width = 0.01, position = position_dodge(width = 0.4))+
603590
geom_point(aes(x = gene_name, y = prob_mean, col = condition), size = 1,
604591
position = position_dodge(width = 0.4))+
605592
theme_bw(base_size = 11)+
@@ -616,7 +603,7 @@ g2 <- ggplot(data = stats)+
616603
facet_wrap(facets = ~contrast)+
617604
geom_hline(yintercept = 0, linetype = "dashed", col = "gray")+
618605
geom_errorbar(aes(x = pmax, y = es_mean, ymin = es_L, ymax = es_H),
619-
col = "darkgray")+
606+
col = "darkgray", width = 0.01)+
620607
geom_point(aes(x = pmax, y = es_mean, col = contrast))+
621608
geom_text_repel(data = stats[stats$pmax >= 0.9, ],
622609
aes(x = pmax, y = es_mean, label = gene_fac),
@@ -635,6 +622,61 @@ g2 <- ggplot(data = stats)+
635622

636623

637624

625+
<!-- ```{r} -->
626+
<!-- Mu <- DGU(ud = d_zibb_5, # input data -->
627+
<!-- mcmc_warmup = 500, # how many MCMC warm-ups per chain (default: 500) -->
628+
<!-- mcmc_steps = 1500, # how many MCMC steps per chain (default: 1,500) -->
629+
<!-- mcmc_chains = 3, # how many MCMC chain to run (default: 4) -->
630+
<!-- mcmc_cores = 3, # how many PC cores to use? (e.g. parallel chains) -->
631+
<!-- hdi_lvl = 0.95, # highest density interval level (de fault: 0.95) -->
632+
<!-- adapt_delta = 0.8, # MCMC target acceptance rate (default: 0.95) -->
633+
<!-- max_treedepth = 10, # tree depth evaluated at each step (default: 12) -->
634+
<!-- paired = FALSE) -->
635+
<!-- ``` -->
636+
637+
638+
639+
<!-- ```{r, fig.weight = 7, fig.height = 4} -->
640+
<!-- g1 <- ggplot(data = Mu$gu)+ -->
641+
<!-- geom_errorbar(aes(x = gene_name, y = prob_mean, ymin = prob_L, -->
642+
<!-- ymax = prob_H, col = condition), -->
643+
<!-- width = 0.01, position = position_dodge(width = 0.4))+ -->
644+
<!-- geom_point(aes(x = gene_name, y = prob_mean, col = condition), size = 1, -->
645+
<!-- position = position_dodge(width = 0.4))+ -->
646+
<!-- theme_bw(base_size = 11)+ -->
647+
<!-- theme(legend.position = "top")+ -->
648+
<!-- ylab(label = "GU [probability]")+ -->
649+
<!-- theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4)) -->
650+
651+
652+
<!-- stats <- Mu$dgu -->
653+
<!-- stats <- stats[order(abs(stats$es_mean), decreasing = FALSE), ] -->
654+
<!-- stats$gene_fac <- factor(x = stats$gene_name, levels = unique(stats$gene_name)) -->
655+
656+
<!-- g2 <- ggplot(data = stats)+ -->
657+
<!-- facet_wrap(facets = ~contrast)+ -->
658+
<!-- geom_hline(yintercept = 0, linetype = "dashed", col = "gray")+ -->
659+
<!-- geom_errorbar(aes(x = pmax, y = es_mean, ymin = es_L, ymax = es_H), -->
660+
<!-- col = "darkgray", width = 0.01)+ -->
661+
<!-- geom_point(aes(x = pmax, y = es_mean, col = contrast))+ -->
662+
<!-- geom_text_repel(data = stats[stats$pmax >= 0.9, ], -->
663+
<!-- aes(x = pmax, y = es_mean, label = gene_fac), -->
664+
<!-- min.segment.length = 0, size = 2.75)+ -->
665+
<!-- theme_bw(base_size = 11)+ -->
666+
<!-- theme(legend.position = "top")+ -->
667+
<!-- xlab(label = expression(pi))+ -->
668+
<!-- xlim(c(0, 1))+ -->
669+
<!-- ylab(expression(gamma)) -->
670+
<!-- ``` -->
671+
672+
673+
<!-- ```{r, fig.height = 6, fig.width = 7} -->
674+
<!-- (g1/g2) -->
675+
<!-- ``` -->
676+
677+
678+
679+
638680
# Session
639681

640682
```{r}

0 commit comments

Comments
 (0)