Skip to content

Commit 1be05f7

Browse files
committed
update to paired design
1 parent 5bf6faf commit 1be05f7

11 files changed

+263
-39
lines changed

DESCRIPTION

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: IgGeneUsage
22
Type: Package
33
Title: Differential gene usage in immune repertoires
4-
Version: 1.17.18
4+
Version: 1.17.19
55
Authors@R:
66
person(given = "Simo",
77
family = "Kitanovski",

R/dgu.R

+4-2
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,17 @@ DGU <- function(ud,
66
mcmc_cores = 1,
77
hdi_lvl = 0.95,
88
adapt_delta = 0.95,
9-
max_treedepth = 12) {
9+
max_treedepth = 12,
10+
paired = FALSE) {
1011

1112
# check inputs
1213
check_dgu_input(ud = ud,
1314
mcmc_chains = as.integer(x = mcmc_chains),
1415
mcmc_cores = as.integer(x = mcmc_cores),
1516
mcmc_steps = as.integer(x = mcmc_steps),
1617
mcmc_warmup = as.integer(x = mcmc_warmup),
17-
hdi_lvl = hdi_lvl)
18+
hdi_lvl = hdi_lvl,
19+
paired = paired)
1820

1921
udr <- ud
2022
ud <- get_usage(u = udr)

R/loo.R

+6-3
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,17 @@ LOO <- function(ud,
88
mcmc_cores = 1,
99
hdi_lvl = 0.95,
1010
adapt_delta = 0.95,
11-
max_treedepth = 12) {
11+
max_treedepth = 12,
12+
paired = FALSE) {
1213

1314
# check inputs
1415
check_dgu_input(ud = ud,
1516
mcmc_chains = as.integer(x = mcmc_chains),
1617
mcmc_cores = as.integer(x = mcmc_cores),
1718
mcmc_steps = as.integer(x = mcmc_steps),
1819
mcmc_warmup = as.integer(x = mcmc_warmup),
19-
hdi_lvl = hdi_lvl)
20+
hdi_lvl = hdi_lvl,
21+
paired = paired)
2022

2123
# process data
2224
udp <- get_usage(u = ud)
@@ -57,7 +59,8 @@ LOO <- function(ud,
5759
mcmc_cores = mcmc_cores,
5860
hdi_lvl = hdi_lvl,
5961
adapt_delta = adapt_delta,
60-
max_treedepth = max_treedepth)
62+
max_treedepth = max_treedepth,
63+
paired = paired)
6164

6265
if(is.data.frame(out$gu)==TRUE) {
6366
out$gu$loo_id <- rs[r]

R/utils_input.R

+15-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
# Description:
32
# Provided the input arguments, this function checks their
43
# validity. It stops the execution if a problem is encountered
@@ -8,13 +7,15 @@ check_dgu_input <- function(ud,
87
mcmc_cores,
98
mcmc_steps,
109
mcmc_warmup,
10+
paired,
1111
hdi_lvl) {
1212

1313
if(missing(ud) || is.null(ud) ||
1414
missing(mcmc_chains) || is.null(mcmc_chains) ||
1515
missing(mcmc_steps) || is.null(mcmc_steps) ||
1616
missing(mcmc_warmup) || is.null(mcmc_warmup) ||
1717
missing(mcmc_cores) || is.null(mcmc_cores) ||
18+
missing(paired) || is.null(paired) ||
1819
missing(hdi_lvl) || is.null(hdi_lvl)) {
1920
stop("arguments must be specified")
2021
}
@@ -24,10 +25,10 @@ check_dgu_input <- function(ud,
2425
mcmc_warmup = mcmc_warmup)
2526
check_mcmc_chains(mcmc_chains = mcmc_chains)
2627
check_mcmc_cores(mcmc_cores = mcmc_cores)
28+
check_paired(paired = paired)
2729
check_hdi(hdi_lvl = hdi_lvl)
2830
}
2931

30-
3132
# Description:
3233
# MCMC Iterations check
3334
check_mcmc_steps <- function(mcmc_steps,
@@ -168,3 +169,15 @@ check_ud <- function(ud) {
168169
}
169170
}
170171
}
172+
173+
# Description:
174+
# paired input check
175+
check_paired <- function(paired) {
176+
if(length(paired) != 1) {
177+
stop('paired must be a logical (TRUE or FALSE)')
178+
}
179+
180+
if(is.logical(paired) == FALSE) {
181+
stop('paired must be a logical (TRUE or FALSE)')
182+
}
183+
}

R/utils_usage.R

+29-6
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ get_usage <- function(u) {
1111
u$replicate_id <- u$replicate
1212
u$sample_id <- paste0(u$individual_id, '|', u$condition, '|', u$replicate)
1313
u$sample_id <- as.numeric(as.factor(u$sample_id))
14-
# u$individual_id <- paste0(u$individual_id, '|', u$condition)
1514

1615
m <- u[duplicated(u$sample_id)==FALSE, c("sample_id",
1716
"individual_id",
@@ -130,15 +129,19 @@ get_usage <- function(u) {
130129
has_balanced_replicates = has_balanced_replicates))
131130
}
132131

133-
134132
# Description:
135133
# get the appropriate model
136134
get_model <- function(has_replicates,
137135
has_conditions,
138-
has_balanced_replicates) {
136+
has_balanced_replicates,
137+
paired) {
139138

140139
model_type <- ifelse(test = has_conditions, yes = "DGU", no = "GU")
141140

141+
if(paired == TRUE & has_balanced_replicates == FALSE) {
142+
stop("For paired analysis with replicates, you need balanced replicates!")
143+
}
144+
142145
if(model_type == "GU") {
143146
if(has_replicates) {
144147
model <- stanmodels$gu_rep
@@ -167,6 +170,18 @@ get_model <- function(has_replicates,
167170
"Yhat_rep", "Yhat_rep_prop", "Yhat_condition_prop",
168171
"log_lik", "dgu", "dgu_prob", "theta")
169172
model_name <- "DGU_rep"
173+
if(paired) {
174+
model <- stanmodels$dgu_paired_rep
175+
pars <- c("phi", "kappa", "alpha",
176+
"sigma_condition", "sigma_individual", "sigma_alpha",
177+
"sigma_alpha_rep", "sigma_beta_rep",
178+
"alpha_sample", "beta_sample",
179+
"alpha_individual", "beta_individual",
180+
"beta_condition",
181+
"Yhat_rep", "Yhat_rep_prop", "Yhat_condition_prop",
182+
"log_lik", "dgu", "dgu_prob", "theta")
183+
model_name <- "DGU_paired_rep"
184+
}
170185
}
171186
else {
172187
model <- stanmodels$dgu
@@ -176,6 +191,15 @@ get_model <- function(has_replicates,
176191
"Yhat_rep", "Yhat_rep_prop", "Yhat_condition_prop",
177192
"log_lik", "dgu", "dgu_prob", "theta")
178193
model_name <- "DGU"
194+
if(paired) {
195+
model <- stanmodels$dgu_paired
196+
pars <- c("phi", "kappa", "alpha",
197+
"sigma_condition", "sigma_individual",
198+
"mu_individual", "beta_individual", "beta_condition",
199+
"Yhat_rep", "Yhat_rep_prop", "Yhat_condition_prop",
200+
"log_lik", "dgu", "dgu_prob", "theta")
201+
model_name <- "DGU_paired"
202+
}
179203
}
180204
}
181205

@@ -184,11 +208,10 @@ get_model <- function(has_replicates,
184208
model_type = model_type,
185209
pars = pars,
186210
has_replicates = has_replicates,
187-
has_conditions = has_conditions))
211+
has_conditions = has_conditions,
212+
paired = paired))
188213
}
189214

190-
191-
192215
# Description:
193216
# get the appropriate model
194217
get_model_debug <- function(has_replicates,

data/d_zibb_5.RData

716 Bytes
Binary file not shown.

inst/stan/dgu_paired.stan

+143
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
functions {
2+
real zibb_lpmf(int y, int n, real mu, real phi, real kappa) {
3+
if (y == 0) {
4+
return log_sum_exp(bernoulli_lpmf(1 | kappa),
5+
bernoulli_lpmf(0 | kappa) +
6+
beta_binomial_lpmf(0 | n, mu * phi, (1 - mu) * phi));
7+
} else {
8+
return bernoulli_lpmf(0 | kappa) +
9+
beta_binomial_lpmf(y | n, mu * phi, (1 - mu) * phi);
10+
}
11+
}
12+
13+
int zibb_rng(int y, int n, real mu, real phi, real kappa) {
14+
if (bernoulli_rng(kappa) == 1) {
15+
return (0);
16+
} else {
17+
return (beta_binomial_rng(n, mu * phi, (1 - mu) * phi));
18+
}
19+
}
20+
21+
real z_rng(real a, real b, real zi) {
22+
if (bernoulli_rng(zi) == 1) {
23+
return (0);
24+
} else {
25+
return(inv_logit(a+b));
26+
}
27+
}
28+
}
29+
30+
data {
31+
int<lower=0> N_sample; // number of repertoires
32+
int<lower=0> N_gene; // gene
33+
int<lower=0> N_individual; // number of individuals
34+
int<lower=0> N_condition; // number of conditions
35+
array [N_sample] int N; // number of tries
36+
array [N_gene, N_sample] int Y; // number of heads for each coin
37+
array [N_sample] int condition_id; // id of conditions
38+
array [N_sample] int individual_id; // id of replicate
39+
}
40+
41+
transformed data {
42+
// convert int to real N for easier division in generated quantities block
43+
array [N_sample] real Nr;
44+
Nr = N;
45+
}
46+
47+
parameters {
48+
real <lower=0> phi;
49+
real <lower=0, upper=1> kappa;
50+
vector [N_gene] alpha;
51+
real <lower=0> sigma_individual;
52+
vector <lower=0> [N_condition] sigma_condition;
53+
array [N_condition] vector [N_gene] z_beta_condition;
54+
array [N_individual] vector [N_gene] z_beta_individual;
55+
vector [N_individual] mu_individual;
56+
}
57+
58+
transformed parameters {
59+
array [N_sample] vector <lower=0, upper=1> [N_gene] theta;
60+
array [N_individual] vector [N_gene] beta_individual;
61+
array [N_condition] vector [N_gene] beta_condition;
62+
63+
for(j in 1:N_condition) {
64+
beta_condition[j] = 0 + sigma_condition[j] * z_beta_condition[j];
65+
}
66+
67+
68+
for(i in 1:N_individual) {
69+
beta_individual[i] = mu_individual[i] + sigma_individual * z_beta_individual[i];
70+
}
71+
72+
for(i in 1:N_sample) {
73+
theta[i] = inv_logit(alpha + beta_individual[individual_id[i]] + beta_condition[condition_id[i]]);
74+
}
75+
}
76+
77+
model {
78+
target += beta_lpdf(kappa | 1.0, 5.0);
79+
target += exponential_lpdf(phi | 0.01);
80+
target += normal_lpdf(alpha | -3.0, 3.0);
81+
for(i in 1:N_individual) {
82+
target += normal_lpdf(mu_individual[i] | 0.0, 1.0);
83+
target += normal_lpdf(beta_individual[i] | 0.0, 1.0);
84+
target += std_normal_lpdf(z_beta_individual[i]);
85+
}
86+
for(j in 1:N_condition) {
87+
target += std_normal_lpdf(z_beta_condition[j]);
88+
}
89+
target += cauchy_lpdf(sigma_condition | 0.0, 1.0);
90+
target += cauchy_lpdf(sigma_individual | 0.0, 1.0);
91+
92+
for(i in 1:N_sample) {
93+
for(j in 1:N_gene) {
94+
target += zibb_lpmf(Y[j,i] | N[i], theta[i][j], phi, kappa);
95+
}
96+
}
97+
}
98+
99+
generated quantities {
100+
// PPC: count usage (repertoire-level)
101+
array [N_gene, N_sample] int Yhat_rep;
102+
103+
// PPC: proportion usage (repertoire-level)
104+
array [N_gene, N_sample] real Yhat_rep_prop;
105+
106+
// PPC: proportion usage at a gene level in condition
107+
array [N_condition] vector [N_gene] Yhat_condition_prop;
108+
109+
// LOG-LIK
110+
array [N_sample] vector [N_gene] log_lik;
111+
112+
// DGU matrix
113+
matrix [N_gene, N_condition*(N_condition-1)/2] dgu;
114+
matrix [N_gene, N_condition*(N_condition-1)/2] dgu_prob;
115+
int c = 1;
116+
117+
//TODO: speedup, run in C++ not big factor on performance
118+
for(j in 1:N_gene) {
119+
for(i in 1:N_sample) {
120+
Yhat_rep[j, i] = zibb_rng(Y[j, i], N[i], theta[i][j], phi, kappa);
121+
log_lik[i][j] = zibb_lpmf(Y[j, i] | N[i], theta[i][j], phi, kappa);
122+
123+
if(Nr[i] == 0.0) {
124+
Yhat_rep_prop[j, i] = 0;
125+
}
126+
else {
127+
Yhat_rep_prop[j, i] = Yhat_rep[j,i]/Nr[i];
128+
}
129+
}
130+
for(g in 1:N_condition) {
131+
Yhat_condition_prop[g][j] = z_rng(alpha[j], beta_condition[g][j], 0);
132+
}
133+
}
134+
135+
// DGU analysis
136+
for(i in 1:(N_condition-1)) {
137+
for(j in (i+1):N_condition) {
138+
dgu[,c] = beta_condition[i]-beta_condition[j];
139+
dgu_prob[,c]=to_vector(Yhat_condition_prop[i])-to_vector(Yhat_condition_prop[j]);
140+
c = c + 1;
141+
}
142+
}
143+
}

man/DGU.Rd

+6-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ DGU(ud,
1818
mcmc_cores,
1919
hdi_lvl,
2020
adapt_delta,
21-
max_treedepth)
21+
max_treedepth,
22+
paired = FALSE)
2223
}
2324

2425
\arguments{
@@ -41,6 +42,8 @@ ud can also be be a SummarizedExperiment object. See examplary data
4142
\item{hdi_lvl}{Highest density interval (HDI) (default = 0.95).}
4243
\item{adapt_delta}{MCMC setting (default = 0.95).}
4344
\item{max_treedepth}{MCMC setting (default = 12).}
45+
\item{paired}{should a paired samples differential Ig gene analaysis be
46+
performed (default = FALSE)?}
4447
}
4548

4649
\details{
@@ -83,7 +86,8 @@ M <- DGU(ud = d_zibb_2,
8386
mcmc_cores = 1,
8487
hdi_lvl = 0.95,
8588
adapt_delta = 0.8,
86-
max_treedepth = 10)
89+
max_treedepth = 10,
90+
paired = FALSE)
8791

8892
# look at M elements
8993
names(M)

man/LOO.Rd

+6-2
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ LOO(ud,
3838
mcmc_cores,
3939
hdi_lvl,
4040
adapt_delta,
41-
max_treedepth)
41+
max_treedepth,
42+
paired = FALSE)
4243
}
4344

4445

@@ -62,6 +63,8 @@ ud can also be be a SummarizedExperiment object. See dataset
6263
\item{hdi_lvl}{Highest density interval (HDI) (default = 0.95).}
6364
\item{adapt_delta}{MCMC setting (default = 0.95).}
6465
\item{max_treedepth}{MCMC setting (default = 12).}
66+
\item{paired}{should a paired samples differential Ig gene analaysis be
67+
performed (default = FALSE)?}
6568
}
6669

6770
\details{
@@ -100,7 +103,8 @@ L <- LOO(ud = Ig,
100103
mcmc_cores = 1,
101104
hdi_lvl = 0.95,
102105
adapt_delta = 0.99,
103-
max_treedepth = 10)
106+
max_treedepth = 10,
107+
paired = FALSE)
104108

105109
# how many LOOs?
106110
names(L)

0 commit comments

Comments
 (0)