Skip to content

Commit 451f61a

Browse files
committed
new models commited
1 parent 898ffac commit 451f61a

File tree

3 files changed

+354
-0
lines changed

3 files changed

+354
-0
lines changed

inst/stan/dgu_paired.stan

+149
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
functions {
2+
real zibb_lpmf(int y, int n, real mu, real phi, real kappa) {
3+
if (y == 0) {
4+
return log_sum_exp(bernoulli_lpmf(1 | kappa),
5+
bernoulli_lpmf(0 | kappa) +
6+
beta_binomial_lpmf(0 | n, mu * phi, (1 - mu) * phi));
7+
} else {
8+
return bernoulli_lpmf(0 | kappa) +
9+
beta_binomial_lpmf(y | n, mu * phi, (1 - mu) * phi);
10+
}
11+
}
12+
13+
int zibb_rng(int y, int n, real mu, real phi, real kappa) {
14+
if (bernoulli_rng(kappa) == 1) {
15+
return (0);
16+
} else {
17+
return (beta_binomial_rng(n, mu * phi, (1 - mu) * phi));
18+
}
19+
}
20+
21+
real z_rng(real a, real b, real zi) {
22+
if (bernoulli_rng(zi) == 1) {
23+
return (0);
24+
} else {
25+
return(inv_logit(a+b));
26+
}
27+
}
28+
}
29+
30+
data {
31+
int<lower=0> N_sample; // number of repertoires
32+
int<lower=0> N_gene; // gene
33+
int<lower=0> N_individual; // number of individuals
34+
int<lower=0> N_condition; // number of conditions
35+
array [N_individual] int N; // number of tries
36+
array [N_gene, N_individual] int Y; // number of heads for each coin
37+
array [N_individual] int condition_id; // id of conditions
38+
array [N_sample] int individual_id; // id of replicate
39+
}
40+
41+
transformed data {
42+
// convert int to real N for easier division in generated quantities block
43+
array [N_individual] real Nr;
44+
Nr = N;
45+
}
46+
47+
parameters {
48+
real <lower=0> phi;
49+
real <lower=0, upper=1> kappa;
50+
51+
vector [N_gene] alpha;
52+
53+
vector <lower=0> [N_condition] sigma_condition;
54+
vector <lower=0> [N_condition] sigma_individual;
55+
real <lower=0> sigma_alpha;
56+
57+
array [N_individual] vector [N_gene] z_alpha_individual;
58+
array [N_individual] vector [N_gene] z_beta_individual;
59+
array [N_condition] vector [N_gene] z_beta_condition;
60+
}
61+
62+
transformed parameters {
63+
array [N_individual] vector <lower=0, upper=1> [N_gene] theta;
64+
array [N_individual] vector [N_gene] alpha_individual;
65+
array [N_individual] vector [N_gene] beta_individual;
66+
array [N_condition] vector [N_gene] beta_condition;
67+
68+
for(i in 1:N_condition) {
69+
beta_condition[i] = 0 + sigma_condition[i] * z_beta_condition[i];
70+
}
71+
72+
for(i in 1:N_individual) {
73+
alpha_individual[i] = alpha + sigma_alpha * z_alpha_individual[i];
74+
beta_individual[i] = beta_condition[condition_id[i]] + sigma_individual[condition_id[i]] * z_beta_individual[i];
75+
}
76+
77+
for(i in 1:N_sample) {
78+
theta[i] = inv_logit(alpha_individual[individual_id[i]] + beta_individual[individual_id[i]]);
79+
}
80+
}
81+
82+
model {
83+
target += beta_lpdf(kappa | 1.0, 5.0);
84+
target += exponential_lpdf(phi | 0.01);
85+
target += normal_lpdf(alpha | -3.0, 3.0);
86+
87+
for(i in 1:N_condition) {
88+
target += std_normal_lpdf(z_beta_condition[i]);
89+
}
90+
for(i in 1:N_individual) {
91+
target += std_normal_lpdf(z_beta_individual[i]);
92+
}
93+
94+
target += cauchy_lpdf(sigma_individual | 0.0, 1.0);
95+
target += cauchy_lpdf(sigma_condition | 0.0, 1.0);
96+
target += cauchy_lpdf(sigma_alpha | 0.0, 1.0);
97+
98+
for(i in 1:N_individual) {
99+
for(j in 1:N_gene) {
100+
target += zibb_lpmf(Y[j,i] | N[i], theta[i][j], phi, kappa);
101+
}
102+
}
103+
}
104+
105+
generated quantities {
106+
// PPC: count usage (repertoire-level)
107+
array [N_gene, N_individual] int Yhat_rep;
108+
109+
// PPC: proportion usage (repertoire-level)
110+
array [N_gene, N_individual] real Yhat_rep_prop;
111+
112+
// PPC: proportion usage at a gene level in condition
113+
array [N_condition] vector [N_gene] Yhat_condition_prop;
114+
115+
// LOG-LIK
116+
array [N_individual] vector [N_gene] log_lik;
117+
118+
// DGU matrix
119+
matrix [N_gene, N_condition*(N_condition-1)/2] dgu;
120+
matrix [N_gene, N_condition*(N_condition-1)/2] dgu_prob;
121+
int c = 1;
122+
123+
//TODO: speedup, run in C++ not big factor on performance
124+
for(j in 1:N_gene) {
125+
for(i in 1:N_individual) {
126+
Yhat_rep[j, i] = zibb_rng(Y[j, i], N[i], theta[i][j], phi, kappa);
127+
log_lik[i][j] = zibb_lpmf(Y[j, i] | N[i], theta[i][j], phi, kappa);
128+
129+
if(Nr[i] == 0.0) {
130+
Yhat_rep_prop[j, i] = 0;
131+
}
132+
else {
133+
Yhat_rep_prop[j, i] = Yhat_rep[j,i]/Nr[i];
134+
}
135+
}
136+
for(g in 1:N_condition) {
137+
Yhat_condition_prop[g][j] = z_rng(alpha[j], beta_condition[g][j], 0);
138+
}
139+
}
140+
141+
// DGU analysis
142+
for(i in 1:(N_condition-1)) {
143+
for(j in (i+1):N_condition) {
144+
dgu[,c] = beta_condition[i]-beta_condition[j];
145+
dgu_prob[,c]=to_vector(Yhat_condition_prop[i])-to_vector(Yhat_condition_prop[j]);
146+
c = c + 1;
147+
}
148+
}
149+
}

inst/stan/dgu_paired_rep.stan

+165
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
functions {
2+
real zibb_lpmf(int y, int n, real mu, real phi, real kappa) {
3+
if (y == 0) {
4+
return log_sum_exp(bernoulli_lpmf(1 | kappa),
5+
bernoulli_lpmf(0 | kappa) +
6+
beta_binomial_lpmf(0 | n, mu * phi, (1 - mu) * phi));
7+
} else {
8+
return bernoulli_lpmf(0 | kappa) +
9+
beta_binomial_lpmf(y | n, mu * phi, (1 - mu) * phi);
10+
}
11+
}
12+
13+
int zibb_rng(int y, int n, real mu, real phi, real kappa) {
14+
if (bernoulli_rng(kappa) == 1) {
15+
return (0);
16+
} else {
17+
return (beta_binomial_rng(n, mu * phi, (1 - mu) * phi));
18+
}
19+
}
20+
21+
real z_rng(real a, real b, real zi) {
22+
if (bernoulli_rng(zi) == 1) {
23+
return (0);
24+
} else {
25+
return(inv_logit(a+b));
26+
}
27+
}
28+
}
29+
30+
data {
31+
int<lower=0> N_sample; // number of repertoires
32+
int<lower=0> N_gene; // gene
33+
int<lower=0> N_individual; // number of individuals
34+
int<lower=0> N_condition; // number of conditions
35+
int<lower=0> N_replicate; // number of replicates
36+
array [N_individual] int N; // number of tries
37+
array [N_gene, N_individual] int Y; // number of heads for each coin
38+
array [N_individual] int condition_id; // id of conditions
39+
array [N_sample] int individual_id; // id of individual
40+
array [N_sample] int replicate_id; // id of replicate
41+
}
42+
43+
transformed data {
44+
// convert int to real N for easier division in generated quantities block
45+
array [N_individual] real Nr;
46+
Nr = N;
47+
}
48+
49+
parameters {
50+
real <lower=0> phi;
51+
real <lower=0, upper=1> kappa;
52+
53+
vector [N_gene] alpha;
54+
55+
vector <lower=0> [N_condition] sigma_condition;
56+
vector <lower=0> [N_condition] sigma_individual;
57+
real <lower=0> sigma_alpha;
58+
real <lower=0> sigma_alpha_rep;
59+
real <lower=0> sigma_beta_rep;
60+
61+
array [N_individual] vector [N_gene] z_alpha_individual;
62+
array [N_individual] vector [N_gene] z_beta_individual;
63+
array [N_condition] vector [N_gene] z_beta_condition;
64+
array [N_individual, N_replicate] vector [N_gene] z_alpha_sample;
65+
array [N_individual, N_replicate] vector [N_gene] z_beta_sample;
66+
}
67+
68+
transformed parameters {
69+
array [N_condition] vector [N_gene] beta_condition;
70+
array [N_individual] vector [N_gene] alpha_individual;
71+
array [N_individual] vector [N_gene] beta_individual;
72+
array [N_individual, N_replicate] vector [N_gene] alpha_sample;
73+
array [N_individual, N_replicate] vector [N_gene] beta_sample;
74+
array [N_individual] vector <lower=0, upper=1> [N_gene] theta;
75+
76+
for(i in 1:N_condition) {
77+
beta_condition[i] = 0 + sigma_condition[i] * z_beta_condition[i];
78+
}
79+
80+
for(i in 1:N_individual) {
81+
alpha_individual[i] = alpha + sigma_alpha * z_alpha_individual[i];
82+
beta_individual[i] = beta_condition[condition_id[i]] + sigma_individual[condition_id[i]] * z_beta_individual[i];
83+
}
84+
85+
for(i in 1:N_sample) {
86+
alpha_sample[individual_id[i], replicate_id[i]] = alpha_individual[individual_id[i]] + sigma_alpha_rep * z_alpha_sample[individual_id[i], replicate_id[i]];
87+
beta_sample[individual_id[i], replicate_id[i]] = beta_individual[individual_id[i]] + sigma_beta_rep * z_beta_sample[individual_id[i], replicate_id[i]];
88+
theta[i] = inv_logit(alpha_sample[individual_id[i], replicate_id[i]] + beta_sample[individual_id[i], replicate_id[i]]);
89+
}
90+
}
91+
92+
model {
93+
target += beta_lpdf(kappa | 1.0, 5.0);
94+
target += exponential_lpdf(phi | 0.01);
95+
target += normal_lpdf(alpha | -3.0, 3.0);
96+
97+
for(i in 1:N_condition) {
98+
target += std_normal_lpdf(z_beta_condition[i]);
99+
}
100+
for(i in 1:N_individual) {
101+
target += std_normal_lpdf(z_beta_individual[i]);
102+
}
103+
for(i in 1:N_sample) {
104+
target += std_normal_lpdf(z_alpha_sample[individual_id[i], replicate_id[i]]);
105+
target += std_normal_lpdf(z_beta_sample[individual_id[i], replicate_id[i]]);
106+
}
107+
108+
target += cauchy_lpdf(sigma_individual | 0.0, 1.0);
109+
target += cauchy_lpdf(sigma_condition | 0.0, 1.0);
110+
target += cauchy_lpdf(sigma_alpha | 0.0, 1.0);
111+
target += cauchy_lpdf(sigma_alpha_rep | 0.0, 1.0);
112+
target += cauchy_lpdf(sigma_beta_rep | 0.0, 1.0);
113+
114+
for(i in 1:N_individual) {
115+
for(j in 1:N_gene) {
116+
target += zibb_lpmf(Y[j,i] | N[i], theta[i][j], phi, kappa);
117+
}
118+
}
119+
}
120+
121+
generated quantities {
122+
// PPC: count usage (repertoire-level)
123+
array [N_gene, N_individual] int Yhat_rep;
124+
125+
// PPC: proportion usage (repertoire-level)
126+
array [N_gene, N_individual] real Yhat_rep_prop;
127+
128+
// PPC: proportion usage at a gene level in condition
129+
array [N_condition] vector [N_gene] Yhat_condition_prop;
130+
131+
// LOG-LIK
132+
array [N_individual] vector [N_gene] log_lik;
133+
134+
// DGU matrix
135+
matrix [N_gene, N_condition*(N_condition-1)/2] dgu;
136+
matrix [N_gene, N_condition*(N_condition-1)/2] dgu_prob;
137+
int c = 1;
138+
139+
//TODO: speedup, run in C++ not big factor on performance
140+
for(j in 1:N_gene) {
141+
for(i in 1:N_individual) {
142+
Yhat_rep[j, i] = zibb_rng(Y[j, i], N[i], theta[i][j], phi, kappa);
143+
log_lik[i][j] = zibb_lpmf(Y[j, i] | N[i], theta[i][j], phi, kappa);
144+
145+
if(Nr[i] == 0.0) {
146+
Yhat_rep_prop[j, i] = 0;
147+
}
148+
else {
149+
Yhat_rep_prop[j, i] = Yhat_rep[j,i]/Nr[i];
150+
}
151+
}
152+
for(g in 1:N_condition) {
153+
Yhat_condition_prop[g][j] = z_rng(alpha[j], beta_condition[g][j], 0);
154+
}
155+
}
156+
157+
// DGU analysis
158+
for(i in 1:(N_condition-1)) {
159+
for(j in (i+1):N_condition) {
160+
dgu[,c] = beta_condition[i]-beta_condition[j];
161+
dgu_prob[,c]=to_vector(Yhat_condition_prop[i])-to_vector(Yhat_condition_prop[j]);
162+
c = c + 1;
163+
}
164+
}
165+
}

man/d_zibb_4.Rd

+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
\name{d_zibb_4}
2+
\alias{d_zibb_4}
3+
\docType{data}
4+
\title{Simulated Ig gene usage data}
5+
6+
\description{
7+
A small example dataset that has the following features:
8+
9+
\itemize{
10+
\item 3 conditions
11+
\item 3 samples per condition
12+
\item 3 replicates per sample
13+
\item 15 Ig genes
14+
}
15+
This dataset was simulated from zero-inflated beta-binomial (ZIBB)
16+
distribution. Simulation code is available in inst/scripts/d_zibb_4.R
17+
}
18+
19+
\usage{
20+
data("d_zibb_4", package = "IgGeneUsage")
21+
}
22+
23+
\format{
24+
A data frame with 4 columns:
25+
\itemize{
26+
\item "individual_id"
27+
\item "condition"
28+
\item "gene_name"
29+
\item "gene_name_count"
30+
}
31+
This format is accepted by IgGeneUsage.
32+
}
33+
\source{
34+
Simulation code is provided in inst/scripts/d_zibb_4.R
35+
}
36+
\examples{
37+
data("d_zibb_4", package = "IgGeneUsage")
38+
head(d_zibb_4)
39+
}
40+
\keyword{d_zibb_4}

0 commit comments

Comments
 (0)