-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathiid_null.stan
More file actions
115 lines (90 loc) · 3.28 KB
/
iid_null.stan
File metadata and controls
115 lines (90 loc) · 3.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
//Independent and Indentically Distributed Fay-Herroit Stan Model with no covariates
// The input data is a vector 'y' of length 'N'.
data{
// dimensions of the data
int<lower=0> C; //number of clusters C should be greater than 0
int<lower=0> D; //number of districts for ghana
//outcome
array[C] int<lower=0> y; //positive cases per cluster greater or equal to 0, never negative
array[C] int<lower=0> n; // children tested per cluster
//luster to district mapping
array[C] int<lower=1, upper=D> district; // which district does cluster c belong to
// aggregation
matrix[D, C] A; // D x C matrix
vector[D] district_counts; // number of cluster per districts
}
// The parameters accepted by the model. Our model
// accepts two parameters 'mu' and 'sigma'.
parameters{
//grand mean (overall mean)
real mu; //overall log-odds of prevalence in ghana
// random effects
vector[D] u; // district random effects (deviations from the grand mean IID)
vector[C] epsilon; // cluster deviations
//variance components
real<lower=0> sigma_u; // how much district vary around the grand mean
real<lower=0> sigma_v; // how much cluster vary within districts
}
// The model to be estimated. We model the output
// 'y' to be normally distributed with mean 'mu'
// and standard deviation 'sigma'.
model{
//Priors
mu ~ normal(0, 1);
//iid district effects - each district drawn from a normal N(0, 1)
// something like u_d <- rnorm(216, mean = 0, sd=1) in R
u ~ normal(0, 1);
// cluster overdisperions
epsilon ~ normal(0, 1);
// variance components
sigma_u ~ exponential(2);
sigma_v ~ exponential(2);
// linear predictors
vector[C] eta = mu + sigma_u * u[district] + sigma_v * epsilon;
//likelihood
y ~ binomial_logit(n, eta);
}
// generated quantities {
//
// vector[C] pi_c = inv_logit( # pi_c are the cluster level fitted prevelance
// mu + sigma_u * u[district] + sigma_v * epsilon
// );
//
// # vector pi_ci is the model's fitted probability of malaria for each cluster — the posterior estimate of true prevalence in cluster cc
// #c after accounting for all parameters. (smootheed estimates)
//
// vector[D] pi_d = (A * pi_c) ./ district_counts; # pi_d is a vector of district level prevelence aggregated from the above cclutser level prevalance
//
//
// real pi_ghana = inv_logit(mu);
//
// vector[C] log_lik;
// for (c in 1:C) {
// log_lik[c] = binomial_logit_lpmf(y[c] | n[c],
// mu + sigma_u * u[district[c]] + sigma_v * epsilon[c]);
// }
// }
generated quantities {
vector[C] pi_c = inv_logit(
mu + sigma_u * u[district] + sigma_v * epsilon
);
// district prevalence — only for sampled districts
vector[D] pi_d;
for (d in 1:D) {
if (district_counts[d] > 0) {
// sampled district — average over clusters
pi_d[d] = dot_product(A[d], pi_c) / district_counts[d];
} else {
// unsampled district — prior prediction only
// grand mean + district random effect, no cluster data
pi_d[d] = inv_logit(mu + sigma_u * u[d]);
}
}
real pi_ghana = inv_logit(mu);
vector[C] log_lik;
for (c in 1:C) {
log_lik[c] = binomial_logit_lpmf(y[c] | n[c],
mu + sigma_u * u[district[c]]
+ sigma_v * epsilon[c]);
}
}