Skip to content

Commit d43cb09

Browse files
committed
update bugfix in paired analysis
1 parent 936fed9 commit d43cb09

8 files changed

+172
-97
lines changed

DESCRIPTION

-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ Suggests:
3232
testthat (>= 2.1.0),
3333
ggplot2,
3434
ggforce,
35-
gridExtra,
3635
ggrepel,
3736
patchwork
3837
LinkingTo:

R/utils_usage.R

+18-9
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ get_usage <- function(u) {
1111
u$replicate_id <- u$replicate
1212
u$sample_id <- paste0(u$individual_id, '|', u$condition, '|', u$replicate)
1313
u$sample_id <- as.numeric(as.factor(u$sample_id))
14-
u$individual_id <- paste0(u$individual_id, '|', u$condition)
14+
# u$individual_id <- paste0(u$individual_id, '|', u$condition)
1515

1616
m <- u[duplicated(u$sample_id)==FALSE, c("sample_id",
1717
"individual_id",
@@ -34,7 +34,7 @@ get_usage <- function(u) {
3434

3535
u$sample_id <- paste0(u$individual_id, '|', u$condition)
3636
u$sample_id <- as.numeric(as.factor(u$sample_id))
37-
u$individual_id <- paste0(u$individual_id, '|', u$condition)
37+
# u$individual_id <- paste0(u$individual_id, '|', u$condition)
3838

3939
m <- u[duplicated(u$sample_id)==FALSE, c("sample_id",
4040
"individual_id",
@@ -90,12 +90,22 @@ get_usage <- function(u) {
9090
tr <- table(replicate_ids)
9191
has_balanced_replicates <- ifelse(test = all(tr==tr[1]), yes=TRUE, no=FALSE)
9292

93-
condition_names <- character(length = max(individual_ids))
94-
for(i in 1:max(individual_ids)) {
95-
q <- individual_names[individual_ids==i][1]
96-
condition_names[i] <- m$condition[m$individual_id == q][1]
93+
if(has_replicates) {
94+
# condition at individual
95+
condition_names <- character(length = max(individual_ids))
96+
for(i in 1:max(individual_ids)) {
97+
q <- individual_names[individual_ids==i][1]
98+
condition_names[i] <- m$condition[m$individual_id == q][1]
99+
}
100+
condition_ids <- as.numeric(as.factor(condition_names))
101+
} else {
102+
# condition at sample
103+
condition_names <- character(length = length(sample_ids))
104+
for(i in 1:length(sample_ids)) {
105+
condition_names[i] <- m$condition[m$sample_id == sample_ids[i]]
106+
}
107+
condition_ids <- as.numeric(as.factor(condition_names))
97108
}
98-
condition_ids <- as.numeric(as.factor(condition_names))
99109

100110
return(list(Y = Y,
101111
N = as.numeric(N),
@@ -168,8 +178,7 @@ get_model <- function(has_replicates,
168178
"log_lik", "theta")
169179
model_name <- "GU"
170180
}
171-
}
172-
else {
181+
} else {
173182
if(has_replicates) {
174183
model <- stanmodels$dgu_rep
175184
pars <- c("phi", "kappa", "alpha",

inst/stan/dgu.stan

+15-16
Original file line numberDiff line numberDiff line change
@@ -28,21 +28,20 @@ functions {
2828
}
2929

3030
data {
31-
// in this model: N_sample = N_individual (no replicates)
3231
int<lower=0> N_sample; // number of repertoires
3332
int<lower=0> N_gene; // gene
3433
int<lower=0> N_individual; // number of individuals
3534
int<lower=0> N_condition; // number of conditions
36-
array [N_individual] int N; // number of tries (repertoire size)
37-
array [N_gene, N_individual] int Y; // number of heads for each coin
38-
array [N_individual] int condition_id; // id of conditions
39-
//array [N_individual] int individual_id; // id of replicate
35+
array [N_sample] int N; // number of tries (repertoire size)
36+
array [N_gene, N_sample] int Y; // number of heads for each coin
37+
array [N_sample] int condition_id; // id of conditions
38+
array [N_sample] int individual_id; // id of replicate
4039
}
4140

4241
transformed data {
4342
// convert int N -> real N fo convenient division
4443
// in generated quantities block
45-
array [N_individual] real Nr;
44+
array [N_sample] real Nr;
4645
Nr = N;
4746
}
4847

@@ -55,20 +54,20 @@ parameters {
5554
vector <lower=0> [N_condition] sigma_condition;
5655
vector <lower=0> [N_condition] sigma_individual;
5756

58-
array [N_individual] vector [N_gene] z_beta_individual;
57+
array [N_sample] vector [N_gene] z_beta_individual;
5958
array [N_condition] vector [N_gene] z_beta_condition;
6059
}
6160

6261
transformed parameters {
63-
array [N_individual] vector <lower=0, upper=1> [N_gene] theta;
64-
array [N_individual] vector [N_gene] beta_individual;
62+
array [N_sample] vector <lower=0, upper=1> [N_gene] theta;
63+
array [N_sample] vector [N_gene] beta_individual;
6564
array [N_condition] vector [N_gene] beta_condition;
6665

6766
for(i in 1:N_condition) {
6867
beta_condition[i] = 0 + sigma_condition[i] * z_beta_condition[i];
6968
}
7069

71-
for(i in 1:N_individual) {
70+
for(i in 1:N_sample) {
7271
beta_individual[i] = beta_condition[condition_id[i]] + sigma_individual[condition_id[i]] * z_beta_individual[i];
7372
theta[i] = inv_logit(alpha + beta_individual[i]);
7473
}
@@ -82,14 +81,14 @@ model {
8281
for(i in 1:N_condition) {
8382
target += std_normal_lpdf(z_beta_condition[i]);
8483
}
85-
for(i in 1:N_individual) {
84+
for(i in 1:N_sample) {
8685
target += std_normal_lpdf(z_beta_individual[i]);
8786
}
8887

8988
target += cauchy_lpdf(sigma_individual | 0.0, 1.0);
9089
target += cauchy_lpdf(sigma_condition | 0.0, 1.0);
9190

92-
for(i in 1:N_individual) {
91+
for(i in 1:N_sample) {
9392
for(j in 1:N_gene) {
9493
target += zibb_lpmf(Y[j,i] | N[i], theta[i][j], phi, kappa);
9594
}
@@ -98,16 +97,16 @@ model {
9897

9998
generated quantities {
10099
// PPC: count usage (repertoire-level)
101-
array [N_gene, N_individual] int Yhat_rep;
100+
array [N_gene, N_sample] int Yhat_rep;
102101

103102
// PPC: proportion usage (repertoire-level)
104-
array [N_gene, N_individual] real Yhat_rep_prop;
103+
array [N_gene, N_sample] real Yhat_rep_prop;
105104

106105
// PPC: proportion usage at a gene level in condition
107106
array [N_condition] vector [N_gene] Yhat_condition_prop;
108107

109108
// LOG-LIK
110-
array [N_individual] vector [N_gene] log_lik;
109+
array [N_sample] vector [N_gene] log_lik;
111110

112111
// DGU matrix
113112
matrix [N_gene, N_condition*(N_condition-1)/2] dgu;
@@ -116,7 +115,7 @@ generated quantities {
116115

117116
//TODO: speedup, run in C++ not big factor on performance
118117
for(j in 1:N_gene) {
119-
for(i in 1:N_individual) {
118+
for(i in 1:N_sample) {
120119
Yhat_rep[j, i] = zibb_rng(Y[j, i], N[i], theta[i][j], phi, kappa);
121120
log_lik[i][j] = zibb_lpmf(Y[j, i] | N[i], theta[i][j], phi, kappa);
122121

inst/stan/dgu_paired.stan

+24-19
Original file line numberDiff line numberDiff line change
@@ -32,15 +32,15 @@ data {
3232
int<lower=0> N_gene; // gene
3333
int<lower=0> N_individual; // number of individuals
3434
int<lower=0> N_condition; // number of conditions
35-
array [N_individual] int N; // number of tries
36-
array [N_gene, N_individual] int Y; // number of heads for each coin
37-
array [N_individual] int condition_id; // id of conditions
35+
array [N_sample] int N; // number of tries
36+
array [N_gene, N_sample] int Y; // number of heads for each coin
37+
array [N_sample] int condition_id; // id of conditions
3838
array [N_sample] int individual_id; // id of replicate
3939
}
4040

4141
transformed data {
4242
// convert int to real N for easier division in generated quantities block
43-
array [N_individual] real Nr;
43+
array [N_sample] real Nr;
4444
Nr = N;
4545
}
4646

@@ -55,27 +55,29 @@ parameters {
5555
real <lower=0> sigma_alpha;
5656

5757
array [N_individual] vector [N_gene] z_alpha_individual;
58-
array [N_individual] vector [N_gene] z_beta_individual;
58+
array [N_individual, N_condition] vector [N_gene] z_beta_individual;
5959
array [N_condition] vector [N_gene] z_beta_condition;
6060
}
6161

6262
transformed parameters {
63-
array [N_individual] vector <lower=0, upper=1> [N_gene] theta;
63+
array [N_sample] vector <lower=0, upper=1> [N_gene] theta;
6464
array [N_individual] vector [N_gene] alpha_individual;
65-
array [N_individual] vector [N_gene] beta_individual;
65+
array [N_individual, N_condition] vector [N_gene] beta_individual;
6666
array [N_condition] vector [N_gene] beta_condition;
6767

68-
for(i in 1:N_condition) {
69-
beta_condition[i] = 0 + sigma_condition[i] * z_beta_condition[i];
68+
for(j in 1:N_condition) {
69+
beta_condition[j] = 0 + sigma_condition[j] * z_beta_condition[j];
7070
}
7171

7272
for(i in 1:N_individual) {
7373
alpha_individual[i] = alpha + sigma_alpha * z_alpha_individual[i];
74-
beta_individual[i] = beta_condition[condition_id[i]] + sigma_individual[condition_id[i]] * z_beta_individual[i];
74+
for(j in 1:N_condition) {
75+
beta_individual[i,j] = beta_condition[j] + sigma_individual[j] * z_beta_individual[i,j];
76+
}
7577
}
7678

7779
for(i in 1:N_sample) {
78-
theta[i] = inv_logit(alpha_individual[individual_id[i]] + beta_individual[individual_id[i]]);
80+
theta[i] = inv_logit(alpha_individual[individual_id[i]] + beta_individual[individual_id[i], condition_id[i]]);
7981
}
8082
}
8183

@@ -84,18 +86,21 @@ model {
8486
target += exponential_lpdf(phi | 0.01);
8587
target += normal_lpdf(alpha | -3.0, 3.0);
8688

87-
for(i in 1:N_condition) {
88-
target += std_normal_lpdf(z_beta_condition[i]);
89+
for(j in 1:N_condition) {
90+
target += std_normal_lpdf(z_beta_condition[j]);
91+
for(i in 1:N_individual) {
92+
target += std_normal_lpdf(z_beta_individual[i,j]);
93+
}
8994
}
9095
for(i in 1:N_individual) {
91-
target += std_normal_lpdf(z_beta_individual[i]);
96+
target += std_normal_lpdf(z_alpha_individual[i]);
9297
}
9398

9499
target += cauchy_lpdf(sigma_individual | 0.0, 1.0);
95100
target += cauchy_lpdf(sigma_condition | 0.0, 1.0);
96101
target += cauchy_lpdf(sigma_alpha | 0.0, 1.0);
97102

98-
for(i in 1:N_individual) {
103+
for(i in 1:N_sample) {
99104
for(j in 1:N_gene) {
100105
target += zibb_lpmf(Y[j,i] | N[i], theta[i][j], phi, kappa);
101106
}
@@ -104,16 +109,16 @@ model {
104109

105110
generated quantities {
106111
// PPC: count usage (repertoire-level)
107-
array [N_gene, N_individual] int Yhat_rep;
112+
array [N_gene, N_sample] int Yhat_rep;
108113

109114
// PPC: proportion usage (repertoire-level)
110-
array [N_gene, N_individual] real Yhat_rep_prop;
115+
array [N_gene, N_sample] real Yhat_rep_prop;
111116

112117
// PPC: proportion usage at a gene level in condition
113118
array [N_condition] vector [N_gene] Yhat_condition_prop;
114119

115120
// LOG-LIK
116-
array [N_individual] vector [N_gene] log_lik;
121+
array [N_sample] vector [N_gene] log_lik;
117122

118123
// DGU matrix
119124
matrix [N_gene, N_condition*(N_condition-1)/2] dgu;
@@ -122,7 +127,7 @@ generated quantities {
122127

123128
//TODO: speedup, run in C++ not big factor on performance
124129
for(j in 1:N_gene) {
125-
for(i in 1:N_individual) {
130+
for(i in 1:N_sample) {
126131
Yhat_rep[j, i] = zibb_rng(Y[j, i], N[i], theta[i][j], phi, kappa);
127132
log_lik[i][j] = zibb_lpmf(Y[j, i] | N[i], theta[i][j], phi, kappa);
128133

inst/stan/dgu_paired_rep.stan

+9-9
Original file line numberDiff line numberDiff line change
@@ -33,16 +33,16 @@ data {
3333
int<lower=0> N_individual; // number of individuals
3434
int<lower=0> N_condition; // number of conditions
3535
int<lower=0> N_replicate; // number of replicates
36-
array [N_individual] int N; // number of tries
37-
array [N_gene, N_individual] int Y; // number of heads for each coin
36+
array [N_sample] int N; // number of tries
37+
array [N_gene, N_sample] int Y; // number of heads for each coin
3838
array [N_individual] int condition_id; // id of conditions
3939
array [N_sample] int individual_id; // id of individual
4040
array [N_sample] int replicate_id; // id of replicate
4141
}
4242

4343
transformed data {
4444
// convert int to real N for easier division in generated quantities block
45-
array [N_individual] real Nr;
45+
array [N_sample] real Nr;
4646
Nr = N;
4747
}
4848

@@ -71,7 +71,7 @@ transformed parameters {
7171
array [N_individual] vector [N_gene] beta_individual;
7272
array [N_individual, N_replicate] vector [N_gene] alpha_sample;
7373
array [N_individual, N_replicate] vector [N_gene] beta_sample;
74-
array [N_individual] vector <lower=0, upper=1> [N_gene] theta;
74+
array [N_sample] vector <lower=0, upper=1> [N_gene] theta;
7575

7676
for(i in 1:N_condition) {
7777
beta_condition[i] = 0 + sigma_condition[i] * z_beta_condition[i];
@@ -111,7 +111,7 @@ model {
111111
target += cauchy_lpdf(sigma_alpha_rep | 0.0, 1.0);
112112
target += cauchy_lpdf(sigma_beta_rep | 0.0, 1.0);
113113

114-
for(i in 1:N_individual) {
114+
for(i in 1:N_sample) {
115115
for(j in 1:N_gene) {
116116
target += zibb_lpmf(Y[j,i] | N[i], theta[i][j], phi, kappa);
117117
}
@@ -120,16 +120,16 @@ model {
120120

121121
generated quantities {
122122
// PPC: count usage (repertoire-level)
123-
array [N_gene, N_individual] int Yhat_rep;
123+
array [N_gene, N_sample] int Yhat_rep;
124124

125125
// PPC: proportion usage (repertoire-level)
126-
array [N_gene, N_individual] real Yhat_rep_prop;
126+
array [N_gene, N_sample] real Yhat_rep_prop;
127127

128128
// PPC: proportion usage at a gene level in condition
129129
array [N_condition] vector [N_gene] Yhat_condition_prop;
130130

131131
// LOG-LIK
132-
array [N_individual] vector [N_gene] log_lik;
132+
array [N_sample] vector [N_gene] log_lik;
133133

134134
// DGU matrix
135135
matrix [N_gene, N_condition*(N_condition-1)/2] dgu;
@@ -138,7 +138,7 @@ generated quantities {
138138

139139
//TODO: speedup, run in C++ not big factor on performance
140140
for(j in 1:N_gene) {
141-
for(i in 1:N_individual) {
141+
for(i in 1:N_sample) {
142142
Yhat_rep[j, i] = zibb_rng(Y[j, i], N[i], theta[i][j], phi, kappa);
143143
log_lik[i][j] = zibb_lpmf(Y[j, i] | N[i], theta[i][j], phi, kappa);
144144

0 commit comments

Comments
 (0)