Skip to content

Commit 918a983

Browse files
committed
bugfixes
1 parent 8ae858c commit 918a983

File tree

6 files changed

+50
-49
lines changed

6 files changed

+50
-49
lines changed

DESCRIPTION

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: IgGeneUsage
22
Type: Package
33
Title: Differential gene usage in immune repertoires
4-
Version: 1.17.23
4+
Version: 1.17.24
55
Authors@R:
66
person(given = "Simo",
77
family = "Kitanovski",

R/utils_ppc.R

+2-2
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,8 @@ get_ppc_condition <- function(glm,
104104
}
105105

106106
# condition map
107-
condition_map <- data.frame(condition_name = ud$condition_names,
108-
condition_id = ud$condition_id)
107+
condition_map <- data.frame(condition_name = ud$condition_name_of_sample,
108+
condition_id = ud$condition_id_of_sample)
109109
condition_map <- condition_map[duplicated(condition_map)==FALSE,]
110110
rownames(condition_map) <- condition_map$condition_id
111111
yhat$gene_name <- ud$gene_names[yhat$gene_id]

R/utils_usage.R

+9-8
Original file line numberDiff line numberDiff line change
@@ -34,27 +34,28 @@ get_usage <- function(u) {
3434
check_paired <- function(u,
3535
has_balanced_replicates,
3636
has_replicates,
37-
has_condition) {
38-
if(has_condition==FALSE) {
37+
has_conditions) {
38+
if(has_conditions==FALSE) {
3939
return(FALSE)
4040
}
4141
if(has_balanced_replicates==FALSE) {
4242
return(FALSE)
4343
}
4444

4545
if(has_replicates) {
46-
q <- u[duplicated(u[,c("individual_id","condition","replicate")])==FALSE,]
46+
q <- u[duplicated(u[,c("individual_id","condition",
47+
"replicate_id")]) == FALSE,]
4748
q$f <- 1
48-
q <- aggregate(f~individual_id+condition+replicate, data = q,
49-
FUN = sum, simplify = FALSE, drop = FALSE)
49+
q <- aggregate(f~individual_id+condition+replicate_id,
50+
data = q, FUN = sum, drop = FALSE)
5051
q$f[is.null(q$f)|is.na(q$f)] <- 0
5152
return(ifelse(test = any(q$f!=1), yes = FALSE, no = TRUE))
5253
}
5354
else {
5455
q <- u[duplicated(u[,c("individual_id", "condition")])==FALSE,]
5556
q$f <- 1
56-
q <- aggregate(f~individual_id+condition, data = q, FUN = sum,
57-
simplify = FALSE, drop = FALSE)
57+
q <- aggregate(f~individual_id+condition,
58+
data = q, FUN = sum, drop = FALSE)
5859
q$f[is.null(q$f)|is.na(q$f)] <- 0
5960
return(ifelse(test = any(q$f!=1), yes = FALSE, no = TRUE))
6061
}
@@ -158,7 +159,7 @@ get_usage <- function(u) {
158159
u = u,
159160
has_balanced_replicates = has_balanced_replicates,
160161
has_replicates = has_replicates,
161-
has_condition = has_condition)
162+
has_conditions = has_conditions)
162163

163164
return(list(Y = Y,
164165
N = as.numeric(N),

data/d_zibb_3.RData

-15 Bytes
Binary file not shown.

inst/scripts/d_zibb_3.R

+13-6
Original file line numberDiff line numberDiff line change
@@ -21,17 +21,24 @@ data {
2121
array [N_individual] int condition_id; // id of conditions
2222
real <lower=0> phi;
2323
real <lower=0, upper=1> kappa;
24-
array [N_condition] vector [N_gene] beta_condition;
2524
vector <lower=0> [N_condition] sigma_individual;
25+
vector <lower=0> [N_condition] sigma_condition;
2626
}
2727
2828
generated quantities {
2929
vector [N_gene] alpha;
3030
array [N_individual] vector <lower=0, upper=1> [N_gene] theta;
3131
array [N_individual] vector [N_gene] beta_individual;
32+
array [N_condition] vector [N_gene] beta_condition;
3233
// generate usage
3334
array [N_gene, N_individual] int Y;
3435
36+
for(i in 1:N_condition) {
37+
for(j in 1:N_gene) {
38+
beta_condition[i][j] = normal_rng(0, sigma_condition[i]);
39+
}
40+
}
41+
3542
for(i in 1:N_gene) {
3643
alpha[i] = normal_rng(-3.0, 1.0);
3744
@@ -54,10 +61,10 @@ N_condition <- 3
5461
N_individual <- 5
5562
N_gene <- 8
5663
N <- 10^3
57-
sigma_individual <- runif(n = N_condition, min = 0.1, max = 0.6)
58-
beta_condition <- t(replicate(n = N_condition, expr = rnorm(n = N_gene, mean = 0, sd = 1)))
64+
sigma_individual <- runif(n = N_condition, min = 0.1, max = 0.2)
65+
sigma_condition <- runif(n = N_condition, min = 0.2, max = 0.6)
5966
phi <- 200
60-
kappa <- 0.03
67+
kappa <- 0.015
6168

6269
condition_id <- rep(x = 1:N_condition, each = N_individual)
6370

@@ -67,7 +74,7 @@ l <- list(N_individual = N_individual*N_condition,
6774
N = N,
6875
condition_id = condition_id,
6976
sigma_individual = sigma_individual,
70-
beta_condition = beta_condition,
77+
sigma_condition = sigma_condition,
7178
phi = phi,
7279
kappa = kappa)
7380

@@ -77,7 +84,7 @@ sim <- rstan::sampling(object = m,
7784
iter = 1,
7885
chains = 1,
7986
algorithm = "Fixed_param",
80-
seed = 123456)
87+
seed = 12346)
8188

8289

8390
# extract simulation and convert into data frame which can

vignettes/User_Manual.Rmd

+25-32
Original file line numberDiff line numberDiff line change
@@ -88,19 +88,19 @@ on the posterior distribution of $\gamma$, and are thus related.
8888
`r Biocpkg("IgGeneUsage")` has a couple of built-in Ig gene usage datasets.
8989
Some were obtained from studies and others were simulated.
9090

91-
Lets look into the simulated dataset `d_zibb_2`. This dataset was generated
91+
Lets look into the simulated dataset `d_zibb_3`. This dataset was generated
9292
by a zero-inflated beta-binomial (ZIBB) model, and `r Biocpkg("IgGeneUsage")`
9393
was designed to fit ZIBB-distributed data.
9494

9595
```{r}
96-
data("d_zibb_2", package = "IgGeneUsage")
97-
knitr::kable(head(d_zibb_2))
96+
data("d_zibb_3", package = "IgGeneUsage")
97+
knitr::kable(head(d_zibb_3))
9898
```
9999

100-
We can also visualize `d_zibb_2` with `r CRANpkg("ggplot")`:
100+
We can also visualize `d_zibb_3` with `r CRANpkg("ggplot")`:
101101

102102
```{r, fig.width=6, fig.height=3.25}
103-
ggplot(data = d_zibb_2)+
103+
ggplot(data = d_zibb_3)+
104104
geom_point(aes(x = gene_name, y = gene_usage_count, col = condition),
105105
position = position_dodge(width = .7), shape = 21)+
106106
theme_bw(base_size = 11)+
@@ -113,10 +113,10 @@ ggplot(data = d_zibb_2)+
113113

114114
## DGU analysis
115115
As main input `r Biocpkg("IgGeneUsage")` uses a data.frame formatted as e.g.
116-
`d_zibb_2`. Other input parameters allow you to configure specific settings
116+
`d_zibb_3`. Other input parameters allow you to configure specific settings
117117
of the `r CRANpkg("rstan")` sampler.
118118

119-
In this example, we analyze `d_zibb_2` with 3 MCMC chains, 1500 iterations
119+
In this example, we analyze `d_zibb_3` with 3 MCMC chains, 1500 iterations
120120
each including 500 warm-ups using a single CPU core (Hint: for parallel
121121
chain execution set parameter `mcmc_cores` = 3). We report for each model
122122
parameter its mean and 95% highest density interval (HDIs).
@@ -129,8 +129,8 @@ issue with a reproducible script at the Bioconductor support site or on
129129
Github[^3].
130130

131131
```{r}
132-
M <- DGU(ud = d_zibb_2, # input data
133-
mcmc_warmup = 500, # how many MCMC warm-ups per chain (default: 500)
132+
M <- DGU(ud = d_zibb_3, # input data
133+
mcmc_warmup = 300, # how many MCMC warm-ups per chain (default: 500)
134134
mcmc_steps = 1500, # how many MCMC steps per chain (default: 1,500)
135135
mcmc_chains = 3, # how many MCMC chain to run (default: 4)
136136
mcmc_cores = 1, # how many PC cores to use? (e.g. parallel chains)
@@ -182,7 +182,7 @@ summary(M)
182182
rstan::check_hmc_diagnostics(M$fit)
183183
```
184184

185-
* rhat < 1.03 and n_eff > 0
185+
* rhat < 1.05 and n_eff > 0
186186

187187

188188
```{r, fig.height = 3, fig.width = 6}
@@ -197,7 +197,7 @@ Error bars show 95% HDI of mean posterior prediction. The predictions can be
197197
compared with the observed data (x-axis). For points near the diagonal
198198
$\rightarrow$ accurate prediction.
199199

200-
```{r, fig.height = 3.25, fig.width = 7}
200+
```{r, fig.height = 4, fig.width = 7}
201201
ggplot(data = M$ppc$ppc_rep)+
202202
facet_wrap(facets = ~individual_id, ncol = 5)+
203203
geom_abline(intercept = 0, slope = 1, linetype = "dashed", col = "darkgray")+
@@ -366,7 +366,7 @@ by evaluating their variability for a specific gene.
366366
This analysis can be computationally demanding.
367367

368368
```{r}
369-
L <- LOO(ud = d_zibb_2, # input data
369+
L <- LOO(ud = d_zibb_3, # input data
370370
mcmc_warmup = 500, # how many MCMC warm-ups per chain (default: 500)
371371
mcmc_steps = 1000, # how many MCMC steps per chain (default: 1,500)
372372
mcmc_chains = 1, # how many MCMC chain to run (default: 4)
@@ -376,6 +376,7 @@ L <- LOO(ud = d_zibb_2, # input data
376376
max_treedepth = 10) # tree depth evaluated at each step (default: 12)
377377
```
378378

379+
379380
Next, we collected the results (GU and DGU) from each LOO iteration:
380381

381382
```{r}
@@ -388,32 +389,32 @@ L_dgu <- do.call(rbind, lapply(X = L, FUN = function(x){return(x$dgu)}))
388389

389390
## LOO-DGU: variability of effect size $\gamma$
390391

391-
```{r, fig.width=6.5, fig.height=4}
392+
```{r, fig.width=6, fig.height=5}
392393
ggplot(data = L_dgu)+
394+
facet_wrap(facets = ~contrast, ncol = 1)+
393395
geom_hline(yintercept = 0, linetype = "dashed", col = "gray")+
394396
geom_errorbar(aes(x = gene_name, y = es_mean, ymin = es_L,
395397
ymax = es_H, col = contrast, group = loo_id),
396-
width = 0.1, position = position_dodge(width = 0.5))+
398+
width = 0.1, position = position_dodge(width = 0.75))+
397399
geom_point(aes(x = gene_name, y = es_mean, col = contrast,
398400
group = loo_id), size = 1,
399-
position = position_dodge(width = 0.5))+
401+
position = position_dodge(width = 0.75))+
400402
theme_bw(base_size = 11)+
401-
theme(legend.position = "top")+
402-
ylab(expression(gamma))+
403-
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4))
403+
theme(legend.position = "none")+
404+
ylab(expression(gamma))
404405
```
405406

406407
## LOO-DGU: variability of $\pi$
407408

408-
```{r, fig.width=6.5, fig.height=4}
409+
```{r, fig.width=6, fig.height=5}
409410
ggplot(data = L_dgu)+
411+
facet_wrap(facets = ~contrast, ncol = 1)+
410412
geom_point(aes(x = gene_name, y = pmax, col = contrast,
411413
group = loo_id), size = 1,
412414
position = position_dodge(width = 0.5))+
413415
theme_bw(base_size = 11)+
414-
theme(legend.position = "top")+
415-
ylab(expression(pi))+
416-
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4))
416+
theme(legend.position = "none")+
417+
ylab(expression(pi))
417418
```
418419

419420

@@ -425,24 +426,16 @@ ggplot(data = L_gu)+
425426
geom_errorbar(aes(x = gene_name, y = prob_mean, ymin = prob_L,
426427
ymax = prob_H, col = condition,
427428
group = interaction(loo_id, condition)),
428-
width = 0.1, position = position_dodge(width = 0.5))+
429+
width = 0.1, position = position_dodge(width = 1))+
429430
geom_point(aes(x = gene_name, y = prob_mean, col = condition,
430431
group = interaction(loo_id, condition)), size = 1,
431-
position = position_dodge(width = 0.5))+
432+
position = position_dodge(width = 1))+
432433
theme_bw(base_size = 11)+
433434
theme(legend.position = "top")+
434435
ylab("GU [probability]")+
435436
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4))
436437
```
437438

438-
# Hierarchical clustering analaysis
439-
440-
```{r, fig.width=6, fig.height=4}
441-
# x <- M$theta
442-
x <- acast(individual_id~gene_name, data = M$theta, value.var = "theta_mean")
443-
444-
plot(hclust(dist(x, method = "euclidean"), method = "average"))
445-
```
446439

447440

448441
# Case Study B: analyzing IRRs containing biological replicates

0 commit comments

Comments
 (0)