Description
From @a-hurst on December 4, 2018 22:59
Summary:
Attempting to run a model that uses the von_mises_lpdf() function will cause the sampler to error out immediately if kappa values are too large (~10 or larger).
Description:
When trying to run a model that uses a von Mises distribution via the von_mises_lpdf() function, the sampler crashes immediately with an error about a boost function (cyl_bessel_i) that's raised an exception due to a 'numeric overflow' when kappa values are too high. This appears to be due to an issue either with the boost function implementation in Stan, or a bug in the cyl_bessel_i function itself.
This is an issue that has come up multiple times over the past few years, from several threads in the old Stan Google Groups page (1, 2, 3) and on the new Stan forum (4). To my knowledge, a formal bug report has yet to be filed for it. I'm new to Stan (just trying to make sure someone else's analysis is reproducible before I upload everything to OSF), so the linked threads above are probably a better source for info on the issue than what I've provided here.
Thanks in advance for the help!
Reproducible Steps:
Here is the model in full:
data{
int<lower=0> N ;
int<lower=0> L ;
real lrt[L] ;
real angle[L] ;
int<lower=1> id[L] ;
int<lower=1,upper=3> cue[L] ; // invalid/valid/neutral
}
transformed data{
vector[10] zeros ;
real neglog2pi ;
neglog2pi = -log(2.0 * pi()) ; // log-probability of uniform component (it's data invariant)
zeros = rep_vector(0,10) ;
}
parameters{
vector[10] within_means ;
vector<lower=0>[10] within_sds ;
// correlation
corr_matrix[10] cor ;
//dummy variable for matt trick
matrix[N,10] beta;
}
transformed parameters{
vector[L] p ; // for storing log-probabilities associated with model
{
// id-level parameter values
vector[N] id_logit_rho_intercept ;
vector[N] id_logit_rho_cuing1 ;
vector[N] id_logit_rho_cuing2 ;
vector[N] id_log_kappa_intercept ;
vector[N] id_log_kappa_cuing1 ;
vector[N] id_log_kappa_cuing2 ;
vector[N] id_lrt_mean_intercept ;
vector[N] id_lrt_mean_cuing1 ;
vector[N] id_lrt_mean_cuing2 ;
vector[N] id_lrt_sd ;
// id-level cell values as matrix
vector[N] id_logit_rho[3] ;
vector[N] id_log_kappa[3] ;
vector[N] id_lrt_mean[3] ;
//useful transformations
vector[N] id_kappa[3] ;
vector[N] id_rho[3] ;
//convert from beta scale to observed scale & add group effects
id_logit_rho_intercept = beta[,1] * within_sds[1]
+ within_means[1]
;
id_logit_rho_cuing1 = beta[,2] * within_sds[2]
+ within_means[2]
;
id_logit_rho_cuing2 = beta[,3] * within_sds[3]
+ within_means[3]
;
id_log_kappa_intercept = beta[,4] * within_sds[4]
+ within_means[4]
;
id_log_kappa_cuing1 = beta[,5] * within_sds[5]
+ within_means[5]
;
id_log_kappa_cuing2 = beta[,6] * within_sds[6]
+ within_means[6]
;
id_lrt_mean_intercept = beta[,7] * within_sds[7]
+ within_means[7]
;
id_lrt_mean_cuing1 = beta[,8] * within_sds[8]
+ within_means[8]
;
id_lrt_mean_cuing2 = beta[,9] * within_sds[9]
+ within_means[9]
;
id_lrt_sd = exp(
beta[,10] * within_sds[10]
+ within_means[10]
)
;
//compute values for each cell
id_lrt_mean[1] = id_lrt_mean_intercept + id_lrt_mean_cuing2;
id_lrt_mean[2] = id_lrt_mean_intercept - id_lrt_mean_cuing1;
id_lrt_mean[3] = id_lrt_mean_intercept;
id_logit_rho[1] = id_logit_rho_intercept + id_lrt_mean_cuing2;
id_logit_rho[2] = id_logit_rho_intercept - id_lrt_mean_cuing1;
id_logit_rho[3] = id_logit_rho_intercept;
id_log_kappa[1] = id_log_kappa_intercept + id_lrt_mean_cuing2;
id_log_kappa[2] = id_log_kappa_intercept + id_lrt_mean_cuing1;
id_log_kappa[3] = id_log_kappa_intercept;
//compute the transforms
for(i in 1:3){
id_kappa[i] = exp(id_log_kappa[i]) ;
for(n in 1:N){
id_rho[i][n] = inv_logit(id_logit_rho[i][n]) ;
}
}
//iterate over trials (this version doesn't have pre-computed id cell values)
for(l in 1:L){
// if (id_kappa[cue[l]][id[l]] > 10) {
// p[l] = normal_lpdf(lrt[l] | id_lrt_mean[cue[l]][id[l]], id_lrt_sd[id[l]])
// + log_mix(
// id_rho[cue[l]][id[l]]
// , normal_lpdf(angle[l] | pi(), sqrt(1/id_kappa[cue[l]][id[l]]))
// , neglog2pi
// );
// } else {
p[l] = normal_lpdf(lrt[l]|id_lrt_mean[cue[l]][id[l]], id_lrt_sd[id[l]])
+ log_mix(
id_rho[cue[l]][id[l]]
, von_mises_lpdf(angle[l]|pi(), id_kappa[cue[l]][id[l]])
, neglog2pi
);
// }
}
}
}
model{
//priors
within_means ~ student_t(4,0,1) ;
within_sds ~ student_t(4,0,1) ;
within_means[1] ~ student_t(4,3,3) ; //logit-rho intercept
within_means[4] ~ student_t(4,3,3) ; //log-kappa intercept
cor ~ lkj_corr(4) ;
//assert sampling of each id's betas from multivariate student-t with df=4
for(this_id in 1:N){
beta[this_id,] ~ multi_student_t(4,zeros,cor) ;
}
//update the log-probability from p (defined in transformed parameters)
target += p ;//used to be: increment_log_prob(p) ;
}
Here's the R code being used to run the model:
# Upload stan_data ----
load("endo_data_for_stan.rdata")
# Load Packages ----
library(tidyverse)
library(rstan)
# Run Stan Model 1 ----
mod = rstan::stan_model("_endo_model.stan")
post = sampling(
mod
, data_for_stan
, pars = c("p")
, include = FALSE
, chains = 10
, cores = 10
, iter = 2000
, refresh = 1
, verbose = T
, control = list(
adapt_delta=0.99
# , max_treedepth = 15
)
)
I have an Rdata with all the data for the model as well, but I'm not sure how best to share that.
Current Output:
Here's what the output looks like (number of chains dropped to 1 for readability):
CHECKING DATA AND PREPROCESSING FOR MODEL '_endo_model' NOW.
COMPILING MODEL '_endo_model' NOW.
STARTING SAMPLER FOR MODEL '_endo_model' NOW.
SAMPLING FOR MODEL '_endo_model' NOW (CHAIN 1).
Chain 1: Unrecoverable error evaluating the log probability at the initial value.
Chain 1: Exception: Error in function boost::math::cyl_bessel_i<double>(double,double): numeric overflow (in 'model1b1d79c30c67__endo_model' at line 120)
[1] "Error in sampler$call_sampler(args_list[[i]]) : "
[2] " Exception: Error in function boost::math::cyl_bessel_i<double>(double,double): numeric overflow (in 'model1b1d79c30c67__endo_model' at line 120)"
error occurred during calling the sampler; sampling not done
RStan Version:
2.18.2
R Version:
R version 3.5.1 (2018-07-02)
Operating System:
macOS 10.14.1
Copied from original issue: stan-dev/rstan#593