forked from stan-dev/math
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnormal_id_glm_lpdf.hpp
212 lines (195 loc) · 8.13 KB
/
normal_id_glm_lpdf.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
#ifndef STAN_MATH_PRIM_PROB_NORMAL_ID_GLM_LPDF_HPP
#define STAN_MATH_PRIM_PROB_NORMAL_ID_GLM_LPDF_HPP
#include <stan/math/prim/meta.hpp>
#include <stan/math/prim/err.hpp>
#include <stan/math/prim/fun/constants.hpp>
#include <stan/math/prim/fun/log.hpp>
#include <stan/math/prim/fun/size.hpp>
#include <stan/math/prim/fun/size_zero.hpp>
#include <stan/math/prim/fun/sum.hpp>
#include <stan/math/prim/fun/to_ref.hpp>
#include <stan/math/prim/fun/value_of_rec.hpp>
#include <cmath>
namespace stan {
namespace math {
/** \ingroup multivar_dists
* Returns the log PDF of the Generalized Linear Model (GLM)
* with Normal distribution and id link function.
* If containers are supplied, returns the log sum of the probabilities.
* The idea is that normal_id_glm_lpdf(y, x, alpha, beta, sigma) should
* compute a more efficient version of normal_lpdf(y, alpha + x * beta, sigma)
* by using analytically simplified gradients.
*
* @tparam T_y type of vector of dependent variables (labels);
* @tparam T_x_scalar type of a scalar in the matrix of independent variables
* (features)
* @tparam T_x_rows compile-time number of rows of `x`. It can be either
* `Eigen::Dynamic` or 1.
* @tparam T_alpha type of the intercept(s);
* this can be a vector (of the same length as y) of intercepts or a single
* value (for models with constant intercept);
* @tparam T_beta type of the weight vector;
* this can also be a single value;
* @tparam T_scale type of the (positive) scale(s);
* this can be a vector (of the same length as y, for heteroskedasticity)
* or a scalar.
*
* @param y scalar or vector of dependent variables. If it is a scalar it will
* be broadcast - used for all instances.
* @param x design matrix or row vector. If it is a row vector it will be
* broadcast - used for all instances.
* @param alpha intercept (in log odds)
* @param beta weight vector
* @param sigma (Sequence of) scale parameters for the normal
* distribution.
* @return log probability or log sum of probabilities
* @throw std::domain_error if x, beta or alpha is infinite.
* @throw std::domain_error if the scale is not positive.
* @throw std::invalid_argument if container sizes mismatch.
*/
template <bool propto, typename T_y, typename T_x_scalar, int T_x_rows,
typename T_alpha, typename T_beta, typename T_scale>
return_type_t<T_y, T_x_scalar, T_alpha, T_beta, T_scale> normal_id_glm_lpdf(
const T_y &y, const Eigen::Matrix<T_x_scalar, T_x_rows, Eigen::Dynamic> &x,
const T_alpha &alpha, const T_beta &beta, const T_scale &sigma) {
using Eigen::Array;
using Eigen::Dynamic;
using Eigen::Matrix;
using Eigen::VectorXd;
using T_partials_return
= partials_return_t<T_y, T_x_scalar, T_alpha, T_beta, T_scale>;
using T_scale_val = typename std::conditional_t<
is_vector<T_scale>::value,
Eigen::Array<partials_return_t<T_scale>, -1, 1>,
partials_return_t<T_scale>>;
using T_y_scaled_tmp =
typename std::conditional_t<T_x_rows == 1, T_partials_return,
Array<T_partials_return, Dynamic, 1>>;
const size_t N_instances = T_x_rows == 1 ? stan::math::size(y) : x.rows();
const size_t N_attributes = x.cols();
static const char *function = "normal_id_glm_lpdf";
check_consistent_size(function, "Vector of dependent variables", y,
N_instances);
check_consistent_size(function, "Weight vector", beta, N_attributes);
check_consistent_size(function, "Vector of scale parameters", sigma,
N_instances);
check_consistent_size(function, "Vector of intercepts", alpha, N_instances);
check_positive_finite(function, "Scale vector", sigma);
if (size_zero(y, sigma)) {
return 0;
}
if (!include_summand<propto, T_y, T_x_scalar, T_alpha, T_beta,
T_scale>::value) {
return 0;
}
const auto &x_val = to_ref(value_of_rec(x));
const auto &beta_val = value_of_rec(beta);
const auto &alpha_val = value_of_rec(alpha);
const auto &sigma_val = value_of_rec(sigma);
const auto &y_val = value_of_rec(y);
const auto &beta_val_vec = to_ref(as_column_vector_or_scalar(beta_val));
const auto &alpha_val_vec = as_column_vector_or_scalar(alpha_val);
const auto &sigma_val_vec = to_ref(as_column_vector_or_scalar(sigma_val));
const auto &y_val_vec = as_column_vector_or_scalar(y_val);
T_scale_val inv_sigma = 1 / as_array_or_scalar(sigma_val_vec);
// the most efficient way to calculate this depends on template parameters
double y_scaled_sq_sum;
Array<T_partials_return, Dynamic, 1> y_scaled(N_instances);
if (T_x_rows == 1) {
T_y_scaled_tmp y_scaled_tmp
= forward_as<T_y_scaled_tmp>((x_val * beta_val_vec)(0, 0));
y_scaled = (as_array_or_scalar(y_val_vec) - y_scaled_tmp
- as_array_or_scalar(alpha_val_vec))
* inv_sigma;
} else {
y_scaled = x_val * beta_val_vec;
y_scaled = (as_array_or_scalar(y_val_vec) - y_scaled
- as_array_or_scalar(alpha_val_vec))
* inv_sigma;
}
operands_and_partials<T_y, Matrix<T_x_scalar, T_x_rows, Dynamic>, T_alpha,
T_beta, T_scale>
ops_partials(y, x, alpha, beta, sigma);
if (!(is_constant_all<T_y, T_x_scalar, T_beta, T_alpha>::value)) {
Matrix<T_partials_return, Dynamic, 1> mu_derivative = inv_sigma * y_scaled;
if (!is_constant_all<T_y>::value) {
if (is_vector<T_y>::value) {
ops_partials.edge1_.partials_ = -mu_derivative;
} else {
ops_partials.edge1_.partials_[0] = -mu_derivative.sum();
}
}
if (!is_constant_all<T_x_scalar>::value) {
if (T_x_rows == 1) {
ops_partials.edge2_.partials_
= forward_as<Array<T_partials_return, Dynamic, T_x_rows>>(
beta_val_vec * sum(mu_derivative));
} else {
ops_partials.edge2_.partials_
= (beta_val_vec * mu_derivative.transpose()).transpose();
}
}
if (!is_constant_all<T_beta>::value) {
if (T_x_rows == 1) {
ops_partials.edge4_.partials_
= forward_as<Matrix<T_partials_return, 1, Dynamic>>(
mu_derivative.sum() * x_val);
} else {
ops_partials.edge4_.partials_ = mu_derivative.transpose() * x_val;
}
}
if (!is_constant_all<T_alpha>::value) {
if (is_vector<T_alpha>::value) {
ops_partials.edge3_.partials_ = mu_derivative;
} else {
ops_partials.edge3_.partials_[0] = sum(mu_derivative);
}
}
if (!is_constant_all<T_scale>::value) {
if (is_vector<T_scale>::value) {
Array<T_partials_return, Dynamic, 1> y_scaled_sq = y_scaled * y_scaled;
y_scaled_sq_sum = sum(y_scaled_sq);
ops_partials.edge5_.partials_ = (y_scaled_sq - 1) * inv_sigma;
} else {
y_scaled_sq_sum = sum(y_scaled * y_scaled);
ops_partials.edge5_.partials_[0]
= (y_scaled_sq_sum - N_instances) * forward_as<double>(inv_sigma);
}
} else {
y_scaled_sq_sum = sum(y_scaled * y_scaled);
}
} else {
y_scaled_sq_sum = sum(y_scaled * y_scaled);
}
if (!std::isfinite(y_scaled_sq_sum)) {
check_finite(function, "Vector of dependent variables", y);
check_finite(function, "Weight vector", beta);
check_finite(function, "Intercept", alpha);
// if all other checks passed, next will only fail if x is not finite
check_finite(function, "Matrix of independent variables", y_scaled_sq_sum);
}
// Compute log probability.
T_partials_return logp(0.0);
if (include_summand<propto>::value) {
logp += NEG_LOG_SQRT_TWO_PI * N_instances;
}
if (include_summand<propto, T_scale>::value) {
if (is_vector<T_scale>::value) {
logp -= sum(log(sigma_val_vec));
} else {
logp -= N_instances * log(forward_as<double>(sigma_val));
}
}
logp -= 0.5 * y_scaled_sq_sum;
return ops_partials.build(logp);
}
template <typename T_y, typename T_x, typename T_alpha, typename T_beta,
typename T_scale>
inline return_type_t<T_y, T_x, T_alpha, T_beta, T_scale> normal_id_glm_lpdf(
const T_y &y, const T_x &x, const T_alpha &alpha, const T_beta &beta,
const T_scale &sigma) {
return normal_id_glm_lpdf<false>(y, x, alpha, beta, sigma);
}
} // namespace math
} // namespace stan
#endif