diff --git a/.Rprofile b/.Rprofile new file mode 100644 index 0000000..3b70f2c --- /dev/null +++ b/.Rprofile @@ -0,0 +1,3 @@ +source("~/.Rprofile") +# Allows to change how all vignettes are run at once (especially to test rstan) +options("SBC.vignettes_cmdstanr" = TRUE) diff --git a/.gitignore b/.gitignore index 90709c8..ab27f4a 100755 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,5 @@ Meta *.exe *_SBC_cache +vignettes/stan/*.rds +vignettes/small_model_workflow/*.rds diff --git a/DESCRIPTION b/DESCRIPTION index ce1cbdd..0008c19 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: SBC Title: Simulation Based Calibration for rstan/cmdstanr models -Version: 0.0.1.9000 +Version: 0.1.1.9000 Authors@R: c(person(given = "Shinyoung", family = "Kim", @@ -16,10 +16,18 @@ Authors@R: family = "Modrák", role = c("aut"), email = "martin.modrak@biomed.cas.cz", - comment = c())) -Description: SBC helps perform Simulation Based Calibration on models using Stan or brms. - It contains various classes and functions to extract samples from models, calculate SBC metrics and visualize utility plots. + comment = c(ORCID= "0000-0002-8886-7797")), + person(given = "Teemu", + family = "Säilynoja", + role = "aut") + ) +Description: SBC helps perform Simulation Based Calibration on Bayesian models. + SBC lets you check for bugs in your model code and/or algorithm that fits + the model. SBC focuses on models built with 'Stan' , + but can support other modelling languages as well. License: MIT + file LICENSE +URL: https://hyunjimoon.github.io/SBC/, https://github.com/hyunjimoon/SBC/ +BugReports: https://github.com/hyunjimoon/SBC/issues Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) @@ -42,5 +50,6 @@ Suggests: rstan, knitr, rmarkdown, - brms + brms, + mclust VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE old mode 100755 new mode 100644 index 7124629..d7bab22 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,22 +6,32 @@ S3method(SBC_backend_default_thin_ranks,default) S3method(SBC_backend_hash_for_cache,SBC_backend_brms) S3method(SBC_backend_hash_for_cache,SBC_backend_cmdstan_sample) S3method(SBC_backend_hash_for_cache,SBC_backend_cmdstan_variational) +S3method(SBC_backend_hash_for_cache,SBC_backend_rjags) +S3method(SBC_backend_hash_for_cache,SBC_backend_rstan_optimizing) S3method(SBC_backend_hash_for_cache,SBC_backend_rstan_sample) S3method(SBC_backend_hash_for_cache,default) -S3method(SBC_backend_iid_samples,SBC_backend_cmdstan_variational) -S3method(SBC_backend_iid_samples,default) +S3method(SBC_backend_iid_draws,SBC_backend_cmdstan_variational) +S3method(SBC_backend_iid_draws,SBC_backend_mock_rng) +S3method(SBC_backend_iid_draws,SBC_backend_rstan_optimizing) +S3method(SBC_backend_iid_draws,default) S3method(SBC_fit,SBC_backend_brms) S3method(SBC_fit,SBC_backend_cmdstan_sample) S3method(SBC_fit,SBC_backend_cmdstan_variational) S3method(SBC_fit,SBC_backend_mock) +S3method(SBC_fit,SBC_backend_mock_rng) +S3method(SBC_fit,SBC_backend_rjags) +S3method(SBC_fit,SBC_backend_rstan_optimizing) S3method(SBC_fit,SBC_backend_rstan_sample) S3method(SBC_fit_to_diagnostics,CmdStanMCMC) S3method(SBC_fit_to_diagnostics,CmdStanVB) +S3method(SBC_fit_to_diagnostics,RStanOptimizingFit) S3method(SBC_fit_to_diagnostics,brmsfit) S3method(SBC_fit_to_diagnostics,default) S3method(SBC_fit_to_diagnostics,stanfit) S3method(SBC_fit_to_draws_matrix,CmdStanMCMC) S3method(SBC_fit_to_draws_matrix,CmdStanVB) +S3method(SBC_fit_to_draws_matrix,RStanOptimizingFit) +S3method(SBC_fit_to_draws_matrix,SBC_rjags_fit) S3method(SBC_fit_to_draws_matrix,brmsfit) S3method(SBC_fit_to_draws_matrix,default) S3method(check_all_SBC_diagnostics,SBC_results) @@ -31,6 +41,8 @@ S3method(generate_datasets,SBC_generator_custom) S3method(generate_datasets,SBC_generator_function) S3method(get_diagnostic_messages,SBC_ADVI_diagnostics) S3method(get_diagnostic_messages,SBC_ADVI_diagnostics_summary) +S3method(get_diagnostic_messages,SBC_RStanOptimizing_diagnostics) +S3method(get_diagnostic_messages,SBC_RStanOptimizing_diagnostics_summary) S3method(get_diagnostic_messages,SBC_results) S3method(get_diagnostic_messages,SBC_results_summary) S3method(length,SBC_datasets) @@ -39,6 +51,8 @@ S3method(plot_contraction,SBC_results) S3method(plot_contraction,data.frame) S3method(plot_coverage,SBC_results) S3method(plot_coverage,data.frame) +S3method(plot_coverage_diff,SBC_results) +S3method(plot_coverage_diff,data.frame) S3method(plot_rank_hist,SBC_results) S3method(plot_rank_hist,data.frame) S3method(plot_sim_estimated,SBC_results) @@ -46,6 +60,7 @@ S3method(plot_sim_estimated,data.frame) S3method(print,SBC_nuts_diagnostics_summary) S3method(print,SBC_results_summary) S3method(summary,SBC_ADVI_diagnostics) +S3method(summary,SBC_RStanOptimizing_diagnostics) S3method(summary,SBC_nuts_diagnostics) S3method(summary,SBC_results) export(SBC_backend_brms) @@ -54,18 +69,26 @@ export(SBC_backend_cmdstan_sample) export(SBC_backend_cmdstan_variational) export(SBC_backend_default_thin_ranks) export(SBC_backend_hash_for_cache) -export(SBC_backend_iid_samples) +export(SBC_backend_iid_draws) export(SBC_backend_mock) +export(SBC_backend_mock_rng) +export(SBC_backend_rjags) +export(SBC_backend_rstan_optimizing) export(SBC_backend_rstan_sample) export(SBC_datasets) export(SBC_diagnostic_messages) +export(SBC_example_backend) +export(SBC_example_generator) +export(SBC_example_results) export(SBC_fit) export(SBC_fit_to_diagnostics) export(SBC_fit_to_draws_matrix) export(SBC_generator_brms) export(SBC_generator_custom) export(SBC_generator_function) +export(SBC_print_example_model) export(SBC_results) +export(SBC_statistics_from_single_fit) export(bind_datasets) export(bind_results) export(calculate_prior_sd) @@ -73,6 +96,7 @@ export(calculate_ranks_draws_matrix) export(calculate_sds_draws_matrix) export(check_all_SBC_diagnostics) export(cjs_dist) +export(compute_SBC) export(compute_gen_quants) export(compute_results) export(data_for_ecdf_plots) @@ -84,17 +108,26 @@ export(empirical_coverage) export(generate_datasets) export(generated_quantities) export(get_diagnostic_messages) +export(invtf_param_vec) export(max_diff) export(plot_contraction) export(plot_coverage) +export(plot_coverage_diff) export(plot_ecdf) export(plot_ecdf_diff) export(plot_rank_hist) export(plot_sim_estimated) export(rank2unif) +export(recompute_SBC_statistics) export(recompute_statistics) +export(rnorm_max_coupling) +export(self_calib_adaptive) +export(self_calib_gaussian) +export(self_calib_gmm) export(set2set) -export(statistics_from_single_fit) +export(tf_param) +export(tf_param_vec) +export(update_quantile_approximation) export(validate_SBC_datasets) export(validate_SBC_results) export(validate_generated_quantities) diff --git a/R/SBC-deprecated.R b/R/SBC-deprecated.R new file mode 100644 index 0000000..fde8510 --- /dev/null +++ b/R/SBC-deprecated.R @@ -0,0 +1,8 @@ +#' @title Deprecated functions in package \pkg{SBC}. +#' @description The functions listed below are deprecated and will be defunct in +#' the near future. When possible, alternative functions with similar +#' functionality are also mentioned. Help pages for deprecated functions are +#' available at \code{help("-deprecated")}. +#' @name SBC-deprecated +#' @keywords internal +NULL diff --git a/R/backend-mock.R b/R/backend-mock.R index 2e1232b..4159c18 100644 --- a/R/backend-mock.R +++ b/R/backend-mock.R @@ -38,3 +38,42 @@ SBC_fit.SBC_backend_mock <- function(backend, generated, cores) { backend$result } +#' @export +SBC_backend_mock_rng <- function(..., n_draws = 1000) { + var_to_rng <- list(...) + if(is.null(names(var_to_rng)) || + length(unique(names(var_to_rng))) != length(var_to_rng) || + any(names(var_to_rng) == "") + ) { + stop("All arguments must have a unique name") + } + var_to_rng <- purrr::map(var_to_rng, purrr::as_mapper) + + purrr::iwalk(var_to_rng, function(rng, name) { + tryCatch({ + res <- rng(13) + }, error = function(e) { + message(e) + stop("Test invocation for argument '", name, "' failed.\n", + "All arguments must be convertible to a function that takes the number of draws as input and returns a 1D array of draws.") + }) + if(!is.numeric(res) || length(res) != 13) { + stop("Test invocation for argument '", name, "' returned unexpected result.\n", + "All arguments must be convertible to a function that takes the number of draws as input and returns a 1D array of draws.") + } + }) + + structure(list(var_to_rng = var_to_rng, n_draws = n_draws), class = "SBC_backend_mock_rng") +} + +#' @export +SBC_fit.SBC_backend_mock_rng <- function(backend, generated, cores) { + draws_list <- purrr::map(backend$var_to_rng, ~ .x(backend$n_draws)) + + do.call(posterior::draws_matrix, draws_list) +} + +#' @export +SBC_backend_iid_draws.SBC_backend_mock_rng <- function(backend) { + TRUE +} diff --git a/R/backend-rjags.R b/R/backend-rjags.R new file mode 100644 index 0000000..21ca8fb --- /dev/null +++ b/R/backend-rjags.R @@ -0,0 +1,62 @@ +#' Create a JAGS backend using `rjags` +#' +#' @param file model file or connection to model code (passed to [rjags::jags.model()]) +#' @param n.iter number of iterations for sampling (passed to [rjags::coda.samples()) +#' @param n.burnin number of iterations used for burnin +#' @param variable.names names of variables to monitor (passed to [rjags::coda.samples()]) +#' @param thin thinning (passed to [rjags::coda.samples()]) +#' @param na.rm whether to omit variables containing NA (passed to [rjags::coda.samples()]) +#' @param ... additional optional arguments passed to [rjags::jags.model()] +#' - most notably `n.chains`, `n.adapt` and `inits`. +#' @export +SBC_backend_rjags <- function(file, n.iter, n.burnin, variable.names, thin = 1, na.rm = TRUE, ...) { + args = list(...) + if(any(names(args) == "data")) { + stop(paste0("Argument 'data' cannot be provided when defining a backend", + " as it needs to be set by the SBC package")) + } + + structure(list(file = file, + n.iter = n.iter, + variable.names = variable.names, + n.burnin = n.burnin, + thin = thin, + na.rm = na.rm, + args = args), class = "SBC_backend_rjags") +} + + +#' @export +SBC_fit.SBC_backend_rjags <- function(backend, generated, cores) { + args_all <- c(list(file = backend$file, data = generated), backend$args) + + model <- do.call(rjags::jags.model, args_all) + if(backend$n.burnin > 0) { + stats::update(model, n.iter = backend$n.burnin, progress.bar = "none") + } + samples <- rjags::coda.samples(model, + variable.names = backend$variable.names, + n.iter = backend$n.iter, + thin = backend$thin, + na.rm = backend$na.rm, + progress.bar = "none") + + structure(list(model = model, samples = samples), + class = "SBC_rjags_fit") +} + + +#' @export +SBC_fit_to_draws_matrix.SBC_rjags_fit <- function(fit) { + posterior::as_draws_matrix(fit$samples) +} + +#' @export +SBC_backend_hash_for_cache.SBC_backend_rjags <- function(backend) { + model_code <- readLines(backend$file) + + backend_for_cache <- backend + backend_for_cache$file <- NULL + backend_for_cache$model_code <- model_code + rlang::hash(backend_for_cache) +} diff --git a/R/backends.R b/R/backends.R index c9b4dd0..0912c14 100644 --- a/R/backends.R +++ b/R/backends.R @@ -1,6 +1,6 @@ -#' Use backend to fit a model to data. +#' S3 generic using backend to fit a model to data. #' -#' S3 generic, needs to be implemented by all backends. +#' Needs to be implemented by all backends. #' All implementations have to return an object for which you can safely #' call [SBC_fit_to_draws_matrix()] and get some draws. #' If that's not possible an error should be raised. @@ -9,11 +9,18 @@ SBC_fit <- function(backend, generated, cores) { UseMethod("SBC_fit") } +#' S3 generic converting a fitted model to a `draws_matrix` object. +#' +#' Needs to be implemented for all types of objects the backend can +#' return from [SBC_fit()]. Default implementation just calls, +#' [posterior::as_draws_matrix()], so if the fit object already supports +#' this, it will work out of the box. #' @export SBC_fit_to_draws_matrix <- function(fit) { UseMethod("SBC_fit_to_draws_matrix") } +#' @rdname SBC_fit_to_draws_matrix #' @export SBC_fit_to_draws_matrix.default <- function(fit) { posterior::as_draws_matrix(fit) @@ -55,32 +62,32 @@ SBC_backend_hash_for_cache.default <- function(backend) { rlang::hash(backend) } -#' S3 generic to let backends signal that they produced independent samples. +#' S3 generic to let backends signal that they produced independent draws. #' #' Most backends (e.g. those based on variatns of MCMC) don't produce -#' independent samples and thus diagnostics like Rhat and ESS are important -#' and samples may need thinning. Backends that already produce independent -#' samples (e.g. ADVI/optimizing) can implement this method to return `TRUE` +#' independent draws and thus diagnostics like Rhat and ESS are important +#' and draws may need thinning. Backends that already produce independent +#' draws (e.g. ADVI/optimizing) can implement this method to return `TRUE` #' to signal this is the case. If this method returns `TRUE`, ESS and Rhat will #' always attain their best possible values and [SBC_backend_default_thin_ranks()] #' will return `1`. #' The default implementation returns `FALSE`. #' @param backend to check #' @export -SBC_backend_iid_samples <- function(backend) { - UseMethod("SBC_backend_iid_samples") +SBC_backend_iid_draws <- function(backend) { + UseMethod("SBC_backend_iid_draws") } -#' @rdname SBC_backend_iid_samples +#' @rdname SBC_backend_iid_draws #' @export -SBC_backend_iid_samples.default <- function(backend) { +SBC_backend_iid_draws.default <- function(backend) { FALSE } #' S3 generic to get backend-specific default thinning for rank computation. #' #' The default implementation plays it relatively safe and returns 10, unless -#' [SBC_backend_iid_samples()] returns `TRUE` in which case it returns 1. +#' [SBC_backend_iid_draws()] returns `TRUE` in which case it returns 1. #' #' @export SBC_backend_default_thin_ranks <- function(backend) { @@ -90,7 +97,7 @@ SBC_backend_default_thin_ranks <- function(backend) { #' @rdname SBC_backend_default_thin_ranks #' @export SBC_backend_default_thin_ranks.default <- function(backend) { - if(SBC_backend_iid_samples(backend)) { + if(SBC_backend_iid_draws(backend)) { 1 } else { 10 @@ -125,7 +132,7 @@ SBC_backend_rstan_sample <- function(model, ...) { #' @export SBC_fit.SBC_backend_rstan_sample <- function(backend, generated, cores) { - do.call(rstan::sampling, + fit <- do.call(rstan::sampling, combine_args(list(object = backend$model, data = generated, ## TODO: Forcing a single core until we can capture output with multiple cores @@ -133,6 +140,12 @@ SBC_fit.SBC_backend_rstan_sample <- function(backend, generated, cores) { cores = 1), backend$args )) + + if(fit@mode != 0) { + stop("Fit does not contain draws.") + } + + fit } #' @export @@ -154,6 +167,130 @@ SBC_backend_hash_for_cache.SBC_backend_rstan_sample <- function(backend) { rlang::hash(list(model = backend$model@model_code, args = backend$args)) } + +#' SBC backend using the `optimizing` method from `rstan`. +#' +#' @param model a `stanmodel` object (created via `rstan::stan_model`) +#' @param ... other arguments passed to `optimizing` (number of iterations, ...). +#' Argument `data` cannot be set this way as they need to be +#' controlled by the package. +#' @param n_retries_hessian the number of times the backend is allow to retry optimization +#' (with different seeed) to produce a usable Hessian that can produce draws. In some cases, +#' the Hessian may be numerically unstable and not be positive definite. +#' @export +SBC_backend_rstan_optimizing <- function(model, ..., n_retries_hessian = 1) { + stopifnot(inherits(model, "stanmodel")) + n_retries_hessian <- as.integer(n_retries_hessian) + stopifnot(length(n_retries_hessian) == 1) + stopifnot(n_retries_hessian > 0) + + args <- list(...) + unacceptable_params <- c("data", "hessian") + if(any(names(args) %in% unacceptable_params)) { + stop(paste0("Parameters ", paste0("'", unacceptable_params, "'", collapse = ", "), + " cannot be provided when defining a backend as they need to be set ", + "by the SBC package")) + } + + args$hessian <- TRUE + if(is.null(args$draws)) { + args$draws <- 1000 + } else if(args$draws <= 1) { + stop("Cannot use optimizing backend with less than 2 draws") + } + structure(list(model = model, args = args, n_retries_hessian = n_retries_hessian), class = "SBC_backend_rstan_optimizing") +} + + +#' @export +SBC_fit.SBC_backend_rstan_optimizing <- function(backend, generated, cores) { + for(attempt in 1:backend$n_retries_hessian) { + start <- proc.time() + fit <- do.call(rstan::optimizing, + combine_args(list(object = backend$model, + data = generated), + backend$args) + ) + end <- proc.time() + fit$time <- (end - start)["elapsed"] + + if(fit$return_code != 0) { + stop("Optimizing was not succesful") + } + # This signals production of draws was OK + if(nrow(fit$theta_tilde) > 1) { + break; + } + } + + fit$n_attempts <- attempt + + if(nrow(fit$theta_tilde) == 1) { + stop("Optimizing did not return draws.\n", + "This is most likely due to numerical problems with the Hessian, check model output.\n", + "You may also consider increasing `n_retries_hessian`") + } + + + structure(fit, class = "RStanOptimizingFit") +} + +#' @export +SBC_backend_hash_for_cache.SBC_backend_rstan_optimizing <- function(backend) { + rlang::hash(list(model = backend$model@model_code, args = backend$args)) +} + +#' @export +SBC_fit_to_draws_matrix.RStanOptimizingFit <- function(fit) { + posterior::as_draws_matrix(fit$theta_tilde) +} + + +#' @export +SBC_backend_iid_draws.SBC_backend_rstan_optimizing <- function(backend) { + TRUE +} + +#' @export +SBC_fit_to_diagnostics.RStanOptimizingFit <- function(fit, fit_output, fit_messages, fit_warnings) { + res <- data.frame( + time = fit$time, + n_attempts = fit$n_attempts + ) + + class(res) <- c("SBC_RStanOptimizing_diagnostics", class(res)) + res +} + +#' @export +summary.SBC_RStanOptimizing_diagnostics <- function(x) { + summ <- list( + n_fits = nrow(x), + max_time = max(x$time), + n_multiple_attempts = sum(x$n_attempts > 1) + ) + + structure(summ, class = "SBC_RStanOptimizing_diagnostics_summary") +} + +#' @export +get_diagnostic_messages.SBC_RStanOptimizing_diagnostics <- function(x) { + get_diagnostic_messages(summary(x)) +} + + +#' @export +get_diagnostic_messages.SBC_RStanOptimizing_diagnostics_summary <- function(x) { + SBC_diagnostic_messages( + rbind( + data.frame(ok = TRUE, message = paste0("Maximum time was ", x$max_time, " sec.")), + data.frame(ok = x$n_multiple_attempts == 0, + message = paste0( x$n_multiple_attempts, " (", round(100 * x$n_multiple_attempts / x$n_fits), + "%) of fits required multiple attempts to produce usable Hessian.")) + ) + ) +} + #' @export summary.SBC_nuts_diagnostics <- function(diagnostics) { summ <- list( @@ -168,13 +305,13 @@ summary.SBC_nuts_diagnostics <- function(diagnostics) { ) if(!is.null(diagnostics$min_bfmi)) { - summ$has_low_bfmi = sum(diagnostics$min_bfmi < 0.2) + summ$has_low_bfmi = sum(is.na(diagnostics$min_bfmi) | diagnostics$min_bfmi < 0.2) } if(!is.null(diagnostics$n_failed_chains)) { if(any(is.na(diagnostics$n_failed_chains))) { - problematic_fit_ids <- paste0(which(is.na(diagnostics$n_failed_chains)), collapse = ", ") - warning("Fits for datasets ", problematic_fit_ids, " had NA for n_failed_chains.") + problematic_sim_ids <- paste0(which(is.na(diagnostics$n_failed_chains)), collapse = ", ") + warning("Fits for simulations ", problematic_sim_ids, " had NA for n_failed_chains.") } summ$has_failed_chains = sum(is.na(diagnostics$n_failed_chains) | diagnostics$n_failed_chains > 0) } @@ -418,16 +555,7 @@ SBC_fit.SBC_backend_cmdstan_variational <- function(backend, generated, cores) { } } - #Re-emit outputs, warnings, messages - for(i in 1:length(fit_outputs)) { - cat(fit_outputs[[i]]$output, sep = "\n") - for(m in 1:length(fit_outputs[[i]]$messages)) { - message(fit_outputs[[i]]$messages[m]) - } - for(w in 1:length(fit_outputs[[i]]$warnings)) { - warning(fit_outputs[[i]]$warnings[w]) - } - } + reemit_captured(fit_outputs[[i]]) if(all(fit$return_codes() != 0)) { stop("Variational inference did not finish succesfully") @@ -449,7 +577,7 @@ SBC_fit_to_draws_matrix.CmdStanVB <- function(fit) { } #' @export -SBC_backend_iid_samples.SBC_backend_cmdstan_variational <- function(backend) { +SBC_backend_iid_draws.SBC_backend_cmdstan_variational <- function(backend) { TRUE } @@ -521,8 +649,18 @@ new_SBC_backend_brms <- function(compiled_model, ) { require_brms_version("brms backend") - arg_names_for_stan <- c("chains", "inits", "iter", "warmup", "thin") + arg_names_for_stan <- c("chains", "inits", "init", "iter", "warmup", "thin") args_for_stan <- args[intersect(names(args), arg_names_for_stan)] + + args_for_stan_renames <- c("inits" = "init") + for(i in 1:length(args_for_stan_renames)) { + orig <- names(args_for_stan_renames)[i] + new <- args_for_stan_renames[i] + if(!is.null(args_for_stan[[orig]])) { + args_for_stan[[new]] <- args_for_stan[[orig]] + args_for_stan[[orig]] <- NULL + } + } stan_backend <- sampling_backend_from_stanmodel(compiled_model, args_for_stan) structure(list(stan_backend = stan_backend, args = args), class = "SBC_backend_brms") @@ -544,13 +682,21 @@ validate_SBC_backend_brms_args <- function(args) { #' Build a backend based on the `brms` package. #' #' @param ... arguments passed to `brm`. -#' @param template_dataset a representative dataset that can be used to generate code. -#' @export -SBC_backend_brms <- function(..., template_dataset) { +#' @param template_data a representative value for the `data` argument in `brm` +#' that can be used to generate code. +#' @param template_dataset DEPRECATED. Use `template_data` +#' @export +SBC_backend_brms <- function(..., template_data, template_dataset = NULL) { + if(!is.null(template_dataset)) { + warning("Argument 'template_dataset' is deprecated, use 'template_data' instead") + if(missing(template_data)) { + template_data <- template_dataset + } + } args = list(...) validate_SBC_backend_brms_args(args) - stanmodel <- stanmodel_for_brms(data = template_dataset, ...) + stanmodel <- stanmodel_for_brms(data = template_data, ...) new_SBC_backend_brms(stanmodel, args) } diff --git a/R/calculate.R b/R/calculate.R index 63ed568..8815a12 100755 --- a/R/calculate.R +++ b/R/calculate.R @@ -174,13 +174,13 @@ ranks_to_empirical_pit <- function(ranks, n_posterior_samples){ #' then all intervals are well calibrated. #' #' @param stats a data.frame of rank statistics (e.g. as returned in the `$stats` component of [SBC_results]), -#' at minimum should have at least `parameter`, `rank` and `max_rank` columns) +#' at minimum should have at least `variable`, `rank` and `max_rank` columns) #' @param width a vector of values between 0 and 1 representing widths of credible intervals for #' which we compute coverage. #' @param prob determines width of the uncertainty interval around the observed coverage #' @param inteval_type `"central"` to show coverage of central credible intervals #' or `"leftmost"` to show coverage of leftmost credible intervals (i.e. the observed CDF). -#' @return A `data.frame` with columns `parameter`, `width` (width of the interval as given +#' @return A `data.frame` with columns `variable`, `width` (width of the interval as given #' in the `width` parameter), `width_represented` the closest width that can be represented by #' the ranks in the input (any discrepancy needs to be judged against this rather than `width`), #' `estimate` - observed coverage for the interval, `ci_low`, `ci_high` the uncertainty @@ -188,9 +188,18 @@ ranks_to_empirical_pit <- function(ranks, n_posterior_samples){ #' @seealso [plot_coverage()] #' @export empirical_coverage <- function(stats, width, prob = 0.95, interval_type = "central") { - if(!all(c("parameter", "rank", "max_rank") %in% names(stats))) { + stopifnot(is.data.frame(stats)) + # Ensuring backwards compatibility + if("parameter" %in% names(stats)) { + if(!("variable" %in% names(stats))) { + warning("The stats parameter contains a `parameter` column, which is deprecated, use `variable` instead.") + stats$variable <- stats$parameter + } + } + + if(!all(c("variable", "rank", "max_rank") %in% names(stats))) { stop(SBC_error("SBC_invalid_argument_error", - "The stats data.frame needs a 'parameter', 'rank' and 'max_rank' columns")) + "The stats data.frame needs a 'variable', 'rank' and 'max_rank' columns")) } stopifnot(is.numeric(width)) @@ -217,7 +226,7 @@ empirical_coverage <- function(stats, width, prob = 0.95, interval_type = "centr is_covered = rank >= low_rank & rank <= high_rank) summ <- dplyr::summarise( - dplyr::group_by(long, parameter, width), + dplyr::group_by(long, variable, width), post_alpha = sum(is_covered) + 1, post_beta = dplyr::n() - sum(is_covered) + 1, width_represented = unique(width_represented), diff --git a/R/datasets.R b/R/datasets.R index 7838e9b..698f83a 100644 --- a/R/datasets.R +++ b/R/datasets.R @@ -1,7 +1,12 @@ -new_SBC_datasets <- function(parameters, generated) { - +new_SBC_datasets <- function(variables, generated, parameters = NULL) { + if(!is.null(parameters)) { + warning("The `parameters` argument is deprecated use `variables` instead.") + if(missing(variables)) { + variables <- parameters + } + } - structure(list(parameters = parameters, + structure(list(variables = variables, generated = generated), class = "SBC_datasets") } @@ -10,20 +15,26 @@ new_SBC_datasets <- function(parameters, generated) { validate_SBC_datasets <- function(x) { stopifnot(is.list(x)) stopifnot(inherits(x, "SBC_datasets")) - if(!posterior::is_draws_matrix(x$parameters)) { - stop("SBC_datasets object has to have a 'parameters' field of type draws_matrix") + if(!is.null(x$parameters)) { + warning("Encountered old version of datasets using `parameters`, which is deprecated, will rename to `variables`.") + if(is.null(x$variables)) { + x$variables <- x$parameters + } + } + if(!posterior::is_draws_matrix(x$variables)) { + stop("SBC_datasets object has to have a 'variables' field of type draws_matrix") } if(!is.list(x$generated)) { stop("SBC_datasets object has to have a 'generated' field of type list") } - if(posterior::nchains(x$parameters) != 1) { - stop("Needs one chain") + if(posterior::nchains(x$variables) != 1) { + stop("The `variables` draws_matrix needs exactly one chain.") } - if(posterior::ndraws(x$parameters) != length(x$generated)) { - stop("Needs equal no. of draws for parameters and length of generated") + if(posterior::ndraws(x$variables) != length(x$generated)) { + stop("Needs equal no. of draws for variables and length of generated") } x @@ -34,13 +45,20 @@ validate_SBC_datasets <- function(x) { #' In most cases, you may want to use `generate_datasets` to build the object, but #' for full control, you can also create datasets directly via this function. #' -#' @param parameters samples of "true" values of unobserved parameters. +#' @param variables draws of "true" values of unobserved parameters or other derived variables. #' An object of class `draws_matrix` (from the `posterior` package) #' @param generated a list of objects that can be passed as data to the backend you plan to use. #' (e.g. list of values for Stan-based backends, a data frame for `SBC_backend_brms`) +#' @param parameters DEPRECATED. Use variables instead. #' @export -SBC_datasets <- function(parameters, generated) { - x <- new_SBC_datasets(parameters, generated) +SBC_datasets <- function(variables, generated, parameters = NULL) { + if(!is.null(parameters)) { + warning("The `parameters` argument is deprecated use `variables` instead.") + if(missing(variables)) { + variables <- parameters + } + } + x <- new_SBC_datasets(variables, generated) validate_SBC_datasets(x) x } @@ -48,7 +66,7 @@ SBC_datasets <- function(parameters, generated) { #' @export length.SBC_datasets <- function(x) { validate_SBC_datasets(x) - posterior::ndraws(x$parameters) + posterior::ndraws(x$variables) } #' Subset an `SBC_datasets` object. @@ -56,7 +74,7 @@ length.SBC_datasets <- function(x) { #' @export `[.SBC_datasets` <- function(x, indices) { validate_SBC_datasets(x) - new_SBC_datasets(posterior::subset_draws(x$parameters, draw = indices, unique = FALSE), + new_SBC_datasets(posterior::subset_draws(x$variables, draw = indices, unique = FALSE), x$generated[indices]) } @@ -69,10 +87,10 @@ bind_datasets <- function(...) { purrr::walk(args, validate_SBC_datasets) #TODO check identical par names - parameters_list <- purrr::map(args, function(x) x$parameters) + variables_list <- purrr::map(args, function(x) x$variables) generated_list <- purrr::map(args, function(x) x$generated) - new_SBC_datasets(do.call(posterior::bind_draws, c(parameters_list, list(along = "draw"))), + new_SBC_datasets(do.call(posterior::bind_draws, c(variables_list, list(along = "draw"))), do.call(c, generated_list)) } @@ -80,16 +98,18 @@ bind_datasets <- function(...) { #' #' @param generator a generator object - build e.g. via `SBC_generator_function` or #' `SBC_generator_brms`. +#' @param n_sims the number of simulated datasets to use +#' @param n_datasets DEPRECATED, use `n_sims` instead. #' @return object of class `SBC_datasets` #' TODO: seed #' @export -generate_datasets <- function(generator, n_datasets) { +generate_datasets <- function(generator, n_sims, n_datasets = NULL) { UseMethod("generate_datasets") } #' Generate datasets via a function that creates a single dataset. #' -#' @param f function returning a list with elements `parameters` +#' @param f function returning a list with elements `variables` #' (prior draws, a list or anything that can be converted to `draws_rvars`) and #' `generated` (observed dataset, ready to be passed to backend) #' @param ... Additional arguments passed to `f` @@ -101,23 +121,41 @@ SBC_generator_function <- function(f, ...) { #' @export -generate_datasets.SBC_generator_function <- function(generator, n_datasets) { - parameters_list <- list() +generate_datasets.SBC_generator_function <- function(generator, n_sims, n_datasets = NULL) { + if(!is.null(n_datasets)) { + warning("n_datasets argument is deprecated, use n_sims instead") + if(missing(n_sims)) { + n_sims <- n_datasets + } + } + variables_list <- list() generated <- list() - for(iter in 1:n_datasets){ + warned_parameters <- FALSE + for(iter in 1:n_sims){ generator_output <- do.call(generator$f, generator$args) + # Ensuring backwards compatibility + if(!is.null(generator_output$parameters)) { + if(!warned_parameters) { + warning("Generator function returns a list with element `parameters`, which is deprecated. Return `variables` instead.") + warned_parameters <- TRUE + } + if(is.null(generator_output$variables)) { + generator_output$variables <- generator_output$parameters + } + } + if(!is.list(generator_output) || - is.null(generator_output$parameters) || + is.null(generator_output$variables) || is.null(generator_output$generated)) { stop(SBC_error("SBC_datasets_error", - "The generating function has to return a list with elements `parameters` - (that can be converted to `draws_rvars`) `generated`")) + "The generating function has to return a list with elements `variables` + (that can be converted to `draws_rvars`) and `generated`")) } - parnames <- names(generator_output$parameters) - if(is.null(parnames) || any(is.na(parnames)) || - any(parnames == "") || length(unique(parnames)) != length(parnames)) { - stop(SBC_error("SBC_datasets_error", "All elements of $parameters must have a unique name")) + varnames <- names(generator_output$variables) + if(is.null(varnames) || any(is.na(varnames)) || + any(varnames == "") || length(unique(varnames)) != length(varnames)) { + stop(SBC_error("SBC_datasets_error", "All elements of $variables must have a unique name")) } # TODO add a validate_input generic that would let backends impose additional checks # on generated data. @@ -145,24 +183,24 @@ generate_datasets.SBC_generator_function <- function(generator, n_datasets) { } } - params_rvars <- + vars_rvars <- do.call( posterior::draws_rvars, - purrr::map(generator_output$parameters, + purrr::map(generator_output$variables, ~ posterior::rvar(array(.x, dim = c(1, guess_dims(.x))), dimnames = guess_dimnames(.x)) ) ) - parameters_list[[iter]] <- posterior::as_draws_matrix(params_rvars) - if(posterior::ndraws(parameters_list[[iter]]) != 1) { - stop("The `parameters` element of the generated data must contain only + variables_list[[iter]] <- posterior::as_draws_matrix(vars_rvars) + if(posterior::ndraws(variables_list[[iter]]) != 1) { + stop("The `variables` element of the generator output must contain only a single draw") } generated[[iter]] <- generator_output$generated } - parameters <- do.call(posterior::bind_draws, args = c(parameters_list, list(along = "draw"))) + variables <- do.call(posterior::bind_draws, args = c(variables_list, list(along = "draw"))) - SBC_datasets(parameters, generated) + SBC_datasets(variables, generated) } #' Wrap a function the creates a complete dataset. @@ -174,13 +212,13 @@ generate_datasets.SBC_generator_function <- function(generator, n_datasets) { #' #' ```r #' gen <- SBC_generator_custom(f, <>) -#' datasets <- generate_datasets(gen, n_datasets = my_n_datasets) +#' datasets <- generate_datasets(gen, n_sims = my_n_sims) #' ``` #' #' is equivalent to just running #' #' ```r -#' datasets <- f(<>, n_datasets = my_n_datasets) +#' datasets <- f(<>, n_sims = my_n_sims) #' ``` #' #' So whenever you control the code calling `generate_datasets`, @@ -191,7 +229,7 @@ generate_datasets.SBC_generator_function <- function(generator, n_datasets) { #' built-in generators do not provide you with enough flexibility. #' #' -#' @param f function accepting at least an `n_datasets` argument and returning +#' @param f function accepting at least an `n_sims` argument and returning #' and `SBC_datasets` object #' @param ... Additional arguments passed to `f` #' @export @@ -201,8 +239,14 @@ SBC_generator_custom <- function(f, ...) { } #'@export -generate_datasets.SBC_generator_custom <- function(generator, n_datasets) { - res <- do.call(generator$f, combine_args(generator$args, list(n_datasets = n_datasets))) +generate_datasets.SBC_generator_custom <- function(generator, n_sims, n_datasets = NULL) { + if(!is.null(n_datasets)) { + warning("n_datasets argument is deprecated, use n_sims instead") + if(missing(n_sims)) { + n_sims <- n_datasets + } + } + res <- do.call(generator$f, combine_args(generator$args, list(n_sims = n_sims))) res <- validate_SBC_datasets(res) res } @@ -214,7 +258,7 @@ generate_datasets.SBC_generator_custom <- function(generator, n_datasets) { #' #' @param ... arguments passed to `brms::brm` #' @param generate_lp whether to compute the overall log-likelihood of the model -#' as an additional parameter. This can be somewhat computationally expensive, +#' as an additional variable. This can be somewhat computationally expensive, #' but improves sensitivity of the SBC process. #' @export SBC_generator_brms <- function(..., generate_lp = TRUE) { @@ -242,7 +286,14 @@ SBC_generator_brms <- function(..., generate_lp = TRUE) { } #' @export -generate_datasets.SBC_generator_brms <- function(generator, n_datasets) { +generate_datasets.SBC_generator_brms <- function(generator, n_sims, n_datasets = NULL) { + if(!is.null(n_datasets)) { + warning("n_datasets argument is deprecated, use n_sims instead") + if(missing(n_sims)) { + n_sims <- n_datasets + } + } + #TODO pass args for control, warmup, .... to sampling if(inherits(generator$compiled_model, "CmdStanModel")) { args_for_fitting <- translate_rstan_args_to_cmdstan(generator$args, include_unrecognized = FALSE) @@ -256,7 +307,7 @@ generate_datasets.SBC_generator_brms <- function(generator, n_datasets) { } - args_for_fitting$iter_sampling <- ceiling(n_datasets / args_for_fitting$chains) * args_for_fitting$thin + args_for_fitting$iter_sampling <- ceiling(n_sims / args_for_fitting$chains) * args_for_fitting$thin args_for_fitting prior_fit <- do.call(generator$compiled_model$sample, @@ -269,13 +320,13 @@ generate_datasets.SBC_generator_brms <- function(generator, n_datasets) { max_rhat <- max(summ$rhat) if(max_rhat > 1.01) { message("Warning: Some rhats are > 1.01 indicating the prior was not explored well.\n", - "The highest rhat is ", round(max_rhat, 2)," for ", summ$parameter[which.max(summ$rhat)], + "The highest rhat is ", round(max_rhat, 2)," for ", summ$variable[which.max(summ$rhat)], "\nConsider adding warmup iterations (via 'warmup' argument).") } min_ess <- min(summ$ess_bulk) - if(min_ess < n_datasets / 2) { - message("Warning: Bulk effective sample size for some parameters is less than half the number of datasets.\n", - "The lowest ESS_bulk/n_datasets is ", round(min_ess / n_datasets, 2)," for ", summ$parameter[which.min(summ$ess_bulk)], + if(min_ess < n_sims / 2) { + message("Warning: Bulk effective sample size for some parameters is less than half the number of simulations.\n", + "The lowest ESS_bulk/n_sims is ", round(min_ess / n_sims, 2)," for ", summ$parameter[which.min(summ$ess_bulk)], "\nConsider increased thinning (via 'thin' argument) .") } @@ -299,7 +350,7 @@ generate_datasets.SBC_generator_brms <- function(generator, n_datasets) { args_for_fitting$thin <- 1 } - args_for_fitting$iter <- args_for_fitting$warmup + ceiling(n_datasets / args_for_fitting$chains) * args_for_fitting$thin + args_for_fitting$iter <- args_for_fitting$warmup + ceiling(n_sims / args_for_fitting$chains) * args_for_fitting$thin prior_fit <- do.call(rstan::sampling, args_for_fitting) @@ -315,8 +366,8 @@ generate_datasets.SBC_generator_brms <- function(generator, n_datasets) { processed_formula <- prior_fit_brms$formula generated <- list() - log_likelihoods <- numeric(n_datasets) - for(i in 1:n_datasets) { + log_likelihoods <- numeric(n_sims) + for(i in 1:n_sims) { new_dataset <- original_data if(inherits(processed_formula, "brmsformula")) { @@ -343,8 +394,8 @@ generate_datasets.SBC_generator_brms <- function(generator, n_datasets) { # ll <- brms::log_lik(prior_fit_brms, newdata = new_dataset, subset = i, cores = 1) # sum(ll) # }, - # generated, 1:n_datasets, - # future.chunk.size = default_chunk_size(n_datasets)) + # generated, 1:n_sims, + # future.chunk.size = default_chunk_size(n_sims)) # } @@ -395,14 +446,14 @@ calculate_prior_sd <- function(datasets) { # TODO this is a hack - there has to be a better diagnostic to get whether # our sd estimate is good (probably via MCSE?) if(length(datasets) < 50) { - warning("Cannot reliably estimate prior_sd with less than 50 datasets.\n", - "Note that you can generate extra datasets that you don't actually fit and use those to estimate prior sd.") + warning("Cannot reliably estimate prior_sd with less than 50 simulations.\n", + "Note that you can generate extra simulations that you don't actually fit and use those to estimate prior sd.") } if(length(datasets) < 2) { - stop("Cannot estimate prior sd with less than 2 datasets") + stop("Cannot estimate prior sd with less than 2 simulations") } - sds_df <- posterior::summarise_draws(datasets$parameters, sd) + sds_df <- posterior::summarise_draws(datasets$variables, sd) sds_vec <- sds_df$sd names(sds_vec) <- sds_df$variable diff --git a/R/example.R b/R/example.R new file mode 100644 index 0000000..8d59547 --- /dev/null +++ b/R/example.R @@ -0,0 +1,199 @@ +#' Construct a generator used in the examples. +#' +#' @param example name of example +#' @param N size of the dataset the generator should simulate +#' @return an object that can be passed to [generate_datasets()] +#' @export +SBC_example_generator <- function(example = c("normal"), N = 100) { + example <- match.arg(example) + if(example == "normal") { + generator_func <- function(N) { + mu <- rnorm(1, 0, 1); + sigma <- abs(rnorm(1, 0, 1)) + y <- rnorm(N, mu, sigma) + list( + variables = list( + mu = mu, + sigma = sigma + ), + generated = list( + N = N, + y = y + ) + ) + } + } else { + stop("Invalid dataset example") + } + + SBC_generator_function(generator_func, N = N) +} + +#' Print the Stan code of a model used in the examples. +#' +#' @param example name of the example model. +#' @export +SBC_print_example_model <- function(example = c("normal_sd", "normal_bad"), + interface = c("rstan", "cmdstanr", "rjags")) { + #Backward compatibility + if(identical(example, "normal_var")) { + example <- "normal_bad" + } + + example <- match.arg(example) + interface <- match.arg(interface) + + if(interface %in% c("rstan", "cmdstanr")) { + example_program <- paste0(example, ".stan") + } else if(interface == "rjags") { + example_program <- paste0(example, ".jags") + } + code <- readLines(system.file(example_program, package = "SBC")) + cat(code, sep = "\n") +} + + +#' Construct a backend to be used in the examples. +#' +#' Note that this will involve compiling a Stan model and may take a while. +#' +#' @param example name of the example model. `normal_sd` is a simple model fitting +#' a normal distribution parametrized as mean and standard deviation. +#' `normal_bad` is a model that _tries_ to implement the `normal_sd` model, +#' but assumes an incorrect parametrization of the normal distribution. +#' For Stan-based backends, the model is written as if Stan parametrized +#' normal distribution with precision (while Stan uses sd), for JAGS-based +#' backends the model is written as if JAGS parametrized normal distribution +#' with sd (while JAGS uses precision). +#' @param interface name of the interface to be used to fit the model +#' @export +SBC_example_backend <- function(example = c("normal_sd", "normal_bad"), + interface = c("rstan", "cmdstanr", "rjags")) { + + #Backward compatibility + if(identical(example, "normal_var")) { + example <- "normal_bad" + } + + example <- match.arg(example) + interface <- match.arg(interface) + + if(interface %in% c("cmdstanr", "rstan")) { + example_program <- paste0(example, ".stan") + + tmp <- file.path(tempdir(), example_program) + if (!file.exists(tmp)) { + file.copy(system.file(example_program, package = "SBC"), tmp) + } + + if(interface == "cmdstanr") { + mod <- cmdstanr::cmdstan_model(tmp) + SBC_backend_cmdstan_sample(mod, chains = 2, iter_warmup = 400) + } else if(interface == "rstan") { + mod <- rstan::stan_model(tmp) + SBC_backend_rstan_sample(mod, chains = 2, iter = 1400, warmup = 400) + } + } else if(interface == "rjags") { + model_file <- system.file(paste0(example, ".jags"), package = "SBC") + SBC_backend_rjags(file = model_file, n.iter = 5000, n.burnin = 5000, + thin = 10, n.chains = 2, + variable.names = c("mu", "sigma")) + } else { + stop("Invalid interface") + } +} + +#' Combine an example backend with an example generator to provide full +#' results that can be used to test other functions in the package. +#' +#' Except for `example = "visualizations"`, all examples will actually +#' compile and fit Stan models and thus may take a while to complete. +#' +#' @param example - name of the example. `normal_ok` is an example +#' where the generator matches the model +#' (using the `normal` generator and `normal_sd` backend), while +#' `normal_bad` is an example with a mismatch between the generator and backend +#' that manifests in SBC (`normal_bad` combines the `normal` generator with +#' `normal_bad` backend). `visualizations` creates a purely artificial results +#' that are meant to showcase the built-in plots (the `interface` parameter will +#' be ignored). +#' @param interface name of the interface to be used for the backend +#' @param N number of datapoints to simulate from the generator for each simulation +#' @param n_sims number of simulations to perform +#' @export +SBC_example_results <- function(example = c("normal_ok", "normal_bad", "visualizations"), + interface = c("rstan", "cmdstanr", "rjags"), + N = 100, n_sims = 50) { + example <- match.arg(example) + interface <- match.arg(interface) + if(example == "normal_ok") { + generator <- SBC_example_generator(example = "normal", N = N) + backend <- SBC_example_backend(example = "normal_sd", interface = interface) + } else if (example == "normal_bad") { + generator <- SBC_example_generator(example = "normal", N = N) + backend <- SBC_example_backend(example = "normal_bad", interface = interface) + } else if (example == "visualizations") { + + df_x <- seq(-4, 4, length.out = 400) + prior_df <- tidyr::crossing(data.frame(x = df_x, density = dnorm(df_x), type = "Prior"), + variable = c("Exact match", + "Model too certain", + "Model too uncertain", + "Model underestimating", + "Model overestimating", + "Some extra-low estimates")) + + generator <- SBC_generator_function(function() { + list( + variables = list( + "Exact match" = rnorm(1), + "Model too certain" = rnorm(1), + "Model too uncertain" = rnorm(1), + "Model underestimating" = rnorm(1), + "Model overestimating" = rnorm(1), + "Some extra-low estimates" = rnorm(1) + ), + generated = list() + ) + }) + + posterior_df <- rbind( + data.frame(variable = "Exact match", x = df_x, density = dnorm(df_x)), + data.frame(variable = "Model too certain", x = df_x, density = dnorm(df_x, sd = 1/3)), + data.frame(variable = "Model too uncertain", x = df_x, density = dnorm(df_x, sd = 2)), + data.frame(variable = "Model underestimating", x = df_x, density = dnorm(df_x, mean = -1)), + data.frame(variable = "Model overestimating", x = df_x, density = dnorm(df_x, mean = 1)), + data.frame(variable = "Some extra-low estimates", x = df_x, + density = 0.1 * dnorm(df_x, mean = -3, sd = 0.1) + 0.9 * dnorm(df_x)) + ) + posterior_df$type = "Data-averaged posterior" + + backend <- SBC_backend_mock_rng( + "Exact match" = ~ rnorm(.), + "Model too certain" = ~ rnorm(., sd = 1/3), + "Model too uncertain" = ~ rnorm(., sd = 2), + "Model underestimating" = ~ rnorm(., mean = -1), + "Model overestimating" = ~ rnorm(., mean = 1), + "Some extra-low estimates" = function(.) { if(runif(1) < 0.1) { rnorm(., mean = -3, sd = 0.1) } else { rnorm(.) }}, + n_draws = 100 + ) + + res <- compute_SBC( + generate_datasets(generator,n_sims = n_sims), + backend + ) + + attr(res, "density_df") <- rbind(prior_df, posterior_df) + + return(res) + } else { + stop("Invalid example") + } + + + + compute_SBC( + generate_datasets(generator,n_sims = n_sims), + backend + ) +} diff --git a/R/metric.R b/R/metric.R index 241de9f..32738aa 100755 --- a/R/metric.R +++ b/R/metric.R @@ -1,4 +1,4 @@ -#' Distance between binned samples (rank for SBC) and discrete uniform +#' Distance between binned draws (rank for SBC) and discrete uniform #' #' @param ranks array of dimension (n_iter, n_pars) where n_iter=number of posterior draw iterations, n_pars the number of parameters of interest #' @param par names of parameter to plot @@ -28,10 +28,10 @@ rank2unif <- function(results, par, bins = 20){ return(list(par = par, rank_list = rank_list)) } -#' Summarize relational property of overall prior and posterior samples +#' Summarize relational property of overall prior and posterior draws #' -#' @param priors A posterior::draws_rvars of dimension(n_iterations=1, n_chains=n_sbc_iterations, n_variables=n_variables) which stores prior samples -#' @param posteriors A posterior::draws_Rvars of dimension(n_iterations=n_posterior_samples, n_chains=n_sbc_iterations, n_variables=n_variables), which stores fitted posterior samples +#' @param priors A posterior::draws_rvars of dimension(n_iterations=1, n_chains=n_sbc_iterations, n_variables=n_variables) which stores prior draws +#' @param posteriors A posterior::draws_Rvars of dimension(n_iterations=n_posterior_draws, n_chains=n_sbc_iterations, n_variables=n_variables), which stores fitted posterior draws #' @param par names of parameter to summarize #' @param bins number of bins for prior and post density #' @export @@ -68,7 +68,7 @@ max_diff <- function(x, y){ # TODO need testing wasserstein <- function(x, y){ tempf <- Vectorize(function(i) abs((x[i]/sum(x) - y[i]/sum(y)))) # expected sums = 1 - val <- integrate(tempf,1,bins, rel.tol=.Machine$double.eps^.05)$value + val <- integrate(tempf,1,5, rel.tol=.Machine$double.eps^.05)$value return(val) } # wasserstein <- function(x, y, bin_count){ @@ -97,8 +97,8 @@ wasserstein <- function(x, y){ ##' ##' This has an upper bound of \eqn{\sqrt \sum (P(x) + Q(x))} ##' -##' @param x numeric vector of samples from first distribution -##' @param y numeric vector of samples from second distribution +##' @param x numeric vector of draws from first distribution +##' @param y numeric vector of draws from second distribution ##' @param x_weights numeric vector of weights of first distribution ##' @param y_weights numeric vector of weights of second distribution ##' @param ... unused @@ -110,8 +110,17 @@ wasserstein <- function(x, y){ ##' Notes in Computer Science, vol 9285. Springer, Cham. ##' \code{doi:10.1007/978-3-319-23525-7_11} ##' @export -cjs_dist <- function(x, y, x_weights = rep(1/length(x), length(x)), y_weights = rep(1/length(y), length(y)), ...) { - +cjs_dist <- function(x, y, x_weights, y_weights, ...) { + if (class(x)[1] == "rvar"){ + x <- c(draws_of(x)) + #cat("y at cjs") + #print(y) + y <- c(draws_of(y)) + x_weights <- rep(1/length(x), length(x)) + y_weights <- rep(1/length(y), length(y)) + } + x_weights <- rep(1/length(x), length(x)) + y_weights <- rep(1/length(y), length(y)) # sort draws and weights x_idx <- order(x) x <- x[x_idx] diff --git a/R/plot.R b/R/plot.R index ca6ae10..5cbc7e9 100755 --- a/R/plot.R +++ b/R/plot.R @@ -1,22 +1,52 @@ #' Plot rank histogram of an SBC results. #' +#' The expected uniform distribution and an approximate confidence interval +#' is also shown. The confidence interval cannot be taken too seriously +#' as it is derived assuming the bins are independent (which they are not). +#' The [plot_ecdf()] and [plot_ecdf_diff()] plots provide better confidence interval +#' but are somewhat less interpretable. See `vignette("rank_visualizations")` for +#' more details. +#' #' By default the support is for `SBC_results` objects and data frames in the same #' format as the `$stats` element of `SBC_results`. +#' #' @param x Object supporting the plotting method. +#' @param variables Names of variables to show +#' @param bins number of bins to be used in the histogram, if left unspecified, +#' it is determined by [guess_rank_hist_bins()]. +#' @param prob The width of the approximate confidence interval shown. #' @export -plot_rank_hist <- function(x, parameters = NULL, bins = NULL, prob = 0.95, ...) { +plot_rank_hist <- function(x, variables = NULL, bins = NULL, prob = 0.95, ..., parameters = NULL) { UseMethod("plot_rank_hist") } #' @export #' @import ggplot2 -plot_rank_hist.data.frame <- function(x, parameters = NULL, bins = NULL, prob = 0.95, max_rank = x$max_rank) { - if(!all(c("parameter", "rank") %in% names(x))) { - stop("The data.frame needs a 'parameter' and 'rank' columns") +plot_rank_hist.data.frame <- function(x, variables = NULL, bins = NULL, prob = 0.95, max_rank = x$max_rank, parameters = NULL) { + # Ensuring backwards compatibility + if("parameter" %in% names(x)) { + if(!("variable" %in% names(x))) { + warning("The x parameter contains a `parameter` column, which is deprecated, use `variable` instead.") + x$variable <- x$parameter + } + } + + + if(!is.null(parameters)) { + warning("The `parameters` argument is deprecated use `variables` instead.") + if(is.null(variables)) { + variables <- parameters + } } - n_simulations <- dplyr::summarise(dplyr::group_by(x, parameter), count = dplyr::n())$count - if(length(unique(n_simulations)) > 1) { - stop("Differing number of SBC steps per parameter not supported.") + + + + if(!all(c("variable", "rank") %in% names(x))) { + stop("The data.frame needs a 'variable' and 'rank' columns") + } + n_sims <- dplyr::summarise(dplyr::group_by(x, variable), count = dplyr::n())$count + if(length(unique(n_sims)) > 1) { + stop("Differing number of SBC steps per variable not supported.") } if(is.null(max_rank)) { @@ -24,23 +54,23 @@ plot_rank_hist.data.frame <- function(x, parameters = NULL, bins = NULL, prob = } max_rank <- unique(max_rank) if(length(max_rank) > 1) { - stop("Differing max_rank across parameters is not supported yet.") + stop("Differing max_rank across variables is not supported yet.") } - n_simulations <- unique(n_simulations) + n_sims <- unique(n_sims) if(is.null(bins)){ - bins <- guess_bins(max_rank, n_simulations) + bins <- guess_rank_hist_bins(max_rank, n_sims) } else if(bins > max_rank + 1) { stop("Cannot use more bins than max_rank + 1") } - if(!is.null(parameters)) { - x <- dplyr::filter(x, parameter %in% parameters) + if(!is.null(variables)) { + x <- dplyr::filter(x, variable %in% variables) } if(nrow(x) == 0) { - stop("No data for the selected parameters.") + stop("No data for the selected variables.") } #CI - taken from https://github.com/seantalts/simulation-based-calibration/blob/master/Rsbc/generate_plots_sbc_inla.R @@ -50,9 +80,9 @@ plot_rank_hist.data.frame <- function(x, parameters = NULL, bins = NULL, prob = # i.e. includes lower quantile of smalelr bins and higher quantile of larger bins larger_bin_size <- ceiling(((max_rank + 1) / bins)) smaller_bin_size <- floor(((max_rank + 1) / bins)) - ci_lower = qbinom(0.5 * (1 - prob), size=n_simulations,prob = smaller_bin_size / max_rank) - ci_mean = qbinom(0.5, size=n_simulations,prob = 1 / bins) - ci_upper = qbinom(0.5 * (1 + prob), size=n_simulations,prob = larger_bin_size / max_rank) + ci_lower = qbinom(0.5 * (1 - prob), size=n_sims,prob = smaller_bin_size / max_rank) + ci_mean = qbinom(0.5, size=n_sims,prob = 1 / bins) + ci_upper = qbinom(0.5 * (1 + prob), size=n_sims,prob = larger_bin_size / max_rank) CI_polygon_x <- c(-0.1*max_rank,0,-0.1*max_rank,1.1 * max_rank,max_rank,1.1 * max_rank,-0.1 * max_rank) CI_polygon_y <- c(ci_lower,ci_mean,ci_upper,ci_upper,ci_mean,ci_lower,ci_lower) @@ -63,39 +93,49 @@ plot_rank_hist.data.frame <- function(x, parameters = NULL, bins = NULL, prob = geom_polygon(data=data.frame(x= CI_polygon_x,y= CI_polygon_y),aes(x=x,y=y),fill="skyblue",color="skyblue1",alpha=0.33) + geom_histogram(breaks = seq(0, max_rank, length.out = bins + 1), closed = "left" ,fill="#808080",colour="black") + scale_y_continuous("count") + - facet_wrap(~parameter, scales = "free_y") + facet_wrap(~variable, scales = "free_y") } #' @export -plot_rank_hist.SBC_results <- function(x, parameters = NULL, bins = NULL, prob = 0.95) { +plot_rank_hist.SBC_results <- function(x, variables = NULL, bins = NULL, prob = 0.95, parameters = NULL) { x <- validate_SBC_results(x) + + if(!is.null(parameters)) { + warning("The `parameters` argument is deprecated use `variables` instead.") + if(is.null(variables)) { + variables <- parameters + } + } + max_rank <- unique(x$stats$max_rank) if(length(max_rank) > 1) { - stop("Differing max_rank across parameters not supported yet.") + stop("Differing max_rank across variables not supported yet.") } - plot_rank_hist(x$stats, parameters = parameters, bins = bins, prob = prob, max_rank = max_rank) + plot_rank_hist(x$stats, variables = variables, bins = bins, prob = prob, max_rank = max_rank) } #' Guess the number of bins for [plot_rank_hist()]. #' @param N the number of ranks observed #' @param max_rank the maximum rank observed -guess_bins <- function(max_rank, N) { +guess_rank_hist_bins <- function(max_rank, N) { min(max_rank + 1, max(floor(N / 10), 5)) } #' Plot the ECDF-based plots. #' #' +#' See `vignette("rank_visualizations")` for +#' more details. #' See the methods for [data_for_ecdf_plots()] for available data formats. #' #' \href{https://arxiv.org/abs/1903.08008}{arxiv::1903.08008} by A. Vehtari et al. #' @export #' @rdname ECDF-plots #' @param x object supporting the [data_for_ecdf_plots()] method. -#' @param parameters optional subset of parameters to show in the plot +#' @param variables optional subset of variables to show in the plot #' @param gamma TODO #' @param prob the width of the plotted confidence interval for the ECDF. #' @param size size passed to [ggplot2::geom_ribbon()] for the confidence band @@ -103,21 +143,31 @@ guess_bins <- function(max_rank, N) { #' @param K number of uniformly spaced evaluation points for the ECDF or ECDFs. Affects #' the granularity of the plot and can significantly speed up the computation #' of the simultaneous confidence bands. Defaults to the smaller of number of -#' ranks per parameter and the maximum rank. +#' ranks per variable and the maximum rank. #' @param ... additional arguments passed to [data_for_ecdf_plots()]. #' Most notably, if `x` is matrix, a `max_rank` parameter needs to be given. +#' @param parameters DEPRECATED, use `variables` instead. #' @import ggplot2 #' @seealso [plot_coverage()] plot_ecdf <- function(x, - parameters = NULL, + variables = NULL, K = NULL, gamma = NULL, prob = 0.95, size = 1, - alpha = 0.33, ...) { + alpha = 0.33, + ..., + parameters = NULL) { + + if(!is.null(parameters)) { + warning("The `parameters` argument is deprecated use `variables` instead.") + if(is.null(variables)) { + variables <- parameters + } + } ecdf_data <- - data_for_ecdf_plots(x, parameters = parameters, + data_for_ecdf_plots(x, variables = variables, prob = prob, K = K, gamma = gamma, ...) N <- ecdf_data$N @@ -165,21 +215,30 @@ plot_ecdf <- function(x, ) + xlab(NULL) + ylab(NULL) + - facet_wrap(~ parameter) + facet_wrap(~ variable) } #' @export #' @rdname ECDF-plots #' @import ggplot2 plot_ecdf_diff <- function(x, - parameters = NULL, + variables = NULL, K = NULL, gamma = NULL, prob = 0.95, size = 1, - alpha = 0.33, ...) { + alpha = 0.33, + ..., + parameters = NULL) { + if(!is.null(parameters)) { + warning("The `parameters` argument is deprecated use `variables` instead.") + if(is.null(variables)) { + variables <- parameters + } + } + ecdf_data <- - data_for_ecdf_plots(x, parameters = parameters, + data_for_ecdf_plots(x, variables = variables, prob = prob, K = K, gamma = gamma, ...) N <- ecdf_data$N @@ -224,7 +283,7 @@ plot_ecdf_diff <- function(x, ) + xlab(NULL) + ylab(NULL) + - facet_wrap(~ parameter) + facet_wrap(~ variable, scales = "free_y") } @@ -240,22 +299,59 @@ data_for_ecdf_plots <- function(x, ..., } -data_for_ecdf_plots.SBC_results <- function(x, parameters = NULL, +data_for_ecdf_plots.SBC_results <- function(x, variables = NULL, prob = 0.95, gamma = NULL, - K = NULL) { - data_for_ecdf_plots(x$stats, parameters = parameters, prob = prob, gamma = gamma, K = K) + K = NULL, + parameters = NULL) { + if(!is.null(parameters)) { + warning("The `parameters` argument is deprecated use `variables` instead.") + if(is.null(variables)) { + variables <- parameters + } + } + + data_for_ecdf_plots(x$stats, variables = variables, prob = prob, gamma = gamma, K = K) } -data_for_ecdf_plots.data.frame <- function(x, parameters = NULL, +data_for_ecdf_plots.data.frame <- function(x, variables = NULL, prob = 0.95, gamma = NULL, K = NULL, - max_rank = x$max_rank) { - stats <- x + max_rank = x$max_rank, + parameters = NULL) { + if(!is.null(parameters)) { - stats <- dplyr::filter(stats, parameter %in% parameters) + warning("The `parameters` argument is deprecated use `variables` instead.") + if(is.null(variables)) { + variables <- parameters + } + } + + if("parameter" %in% names(x)) { + if(!("variable" %in% names(x))) { + warning("The x parameter contains a `parameter` column, which is deprecated, use `variable` instead.") + x$variable <- x$parameter + } + } + + if("dataset_id" %in% names(x)) { + if(!("sim_id" %in% names(x))) { + warning("The x parameter contains a `dataset_id` column, which is deprecated, use `sim_id` instead.") + x$sim_id <- x$dataset_id + } + } + + + if(!all(c("variable", "rank", "sim_id") %in% names(x))) { + stop(SBC_error("SBC_invalid_argument_error", + "The stats data.frame needs a 'variable', 'rank' and 'sim_id' columns")) + } + + stats <- x + if(!is.null(variables)) { + stats <- dplyr::filter(stats, variable %in% variables) } if(is.null(max_rank)) { @@ -263,18 +359,18 @@ data_for_ecdf_plots.data.frame <- function(x, parameters = NULL, } max_rank <- unique(max_rank) if(length(max_rank) > 1) { - stop("Differing max_rank across parameters is not supported yet.") + stop("Differing max_rank across variables is not supported yet.") } - summ <- dplyr::summarise(dplyr::group_by(stats, parameter), count = dplyr::n(), .groups = "drop") + summ <- dplyr::summarise(dplyr::group_by(stats, variable), count = dplyr::n(), .groups = "drop") if(length(unique(summ$count)) > 1) { stop("Not all variables have the same number of simulations.") } - rank <- dplyr::select(stats, dataset_id, parameter, rank) - rank_matrix <- tidyr::pivot_wider(rank, names_from = "parameter", + rank <- dplyr::select(stats, sim_id, variable, rank) + rank_matrix <- tidyr::pivot_wider(rank, names_from = "variable", values_from = "rank") - rank_matrix <- as.matrix(dplyr::select(rank_matrix, -dataset_id)) + rank_matrix <- as.matrix(dplyr::select(rank_matrix, -sim_id)) data_for_ecdf_plots(rank_matrix, max_rank = max_rank, prob = prob, @@ -282,20 +378,29 @@ data_for_ecdf_plots.data.frame <- function(x, parameters = NULL, } data_for_ecdf_plots.matrix <- function(x, - max_rank, - parameters = NULL, - prob = 0.95, - gamma = NULL, - K = NULL, - size = 1, - alpha = 0.33) { + max_rank, + variables = NULL, + prob = 0.95, + gamma = NULL, + K = NULL, + size = 1, + alpha = 0.33, + parameters = NULL) { + + if(!is.null(parameters)) { + warning("The `parameters` argument is deprecated use `variables` instead.") + if(is.null(variables)) { + variables <- parameters + } + } + ranks_matrix <- x if(any(!is.finite(ranks_matrix))) { stop("Ranks may only contain finite values") } - if(!is.null(parameters)) { - ranks_matrix <- ranks_matrix[, parameters] + if(!is.null(variables)) { + ranks_matrix <- ranks_matrix[, variables] } pit <- ranks_to_empirical_pit(ranks_matrix, max_rank) @@ -322,7 +427,7 @@ data_for_ecdf_plots.matrix <- function(x, ecdf_df <- as.data.frame(ecdf_vals) ecdf_df$..z <- z - ecdf_df <- tidyr::pivot_longer(ecdf_df, -..z, names_to = "parameter", values_to = "ecdf") + ecdf_df <- tidyr::pivot_longer(ecdf_df, -..z, names_to = "variable", values_to = "ecdf") ecdf_df <- dplyr::rename(ecdf_df, z = ..z) structure(list(limits_df = limits_df, ecdf_df = ecdf_df, K = K, N = N, z = z), @@ -337,55 +442,78 @@ data_for_ecdf_plots.matrix <- function(x, #' [Towards A Principled Bayesian Workflow](https://betanalpha.github.io/assets/case_studies/principled_bayesian_workflow.html#132_A_Bayesian_Eye_Chart). #' #' @param x object containing results (a data.frame or [SBC_results()] object). -#' @param prior_sd a named vector of prior standard deviations for your parameters. +#' @param prior_sd a named vector of prior standard deviations for your variables. #' Either pass in analytically obtained values or use [calculate_prior_sd()] to get an empirical estimate from #' an `SBC_datasets` object. -#' @param parameters parameters to show in the plot or `NULL` to show all +#' @param variables variables to show in the plot or `NULL` to show all #' must correspond a field already computed in the results (most likely `"mean"` and `"median"`). #' @param scale which scale of variability you want to see - either `"sd"` for standard deviation #' or `"var"` for variance. #' @param alpha the alpha for the points +#' @param parameters DEPRECATED, use `variables` instead. #' @return a ggplot2 plot object #' @export -plot_contraction <- function(x, prior_sd, parameters = NULL, scale = "sd", alpha = 0.8) { +plot_contraction <- function(x, prior_sd, variables = NULL, scale = "sd", alpha = 0.8, parameters = NULL) { UseMethod("plot_contraction") } #' @export -plot_contraction.SBC_results <- function(x, prior_sd, parameters = NULL, scale = "sd", alpha = 0.8) { - plot_contraction(x$stats, prior_sd = prior_sd, parameters = parameters, alpha = alpha) +plot_contraction.SBC_results <- function(x, prior_sd, variables = NULL, scale = "sd", alpha = 0.8, parameters = NULL) { + if(!is.null(parameters)) { + warning("The `parameters` argument is deprecated use `variables` instead.") + if(is.null(variables)) { + variables <- parameters + } + } + + plot_contraction(x$stats, prior_sd = prior_sd, variables = variables, alpha = alpha) } #' @export -plot_contraction.data.frame <- function(x, prior_sd, parameters = NULL, scale = "sd", alpha = 0.8) { - if(!all(c("parameter", "sd") %in% names(x))) { - stop("The data.frame needs a 'parameter' and 'sd' columns") +plot_contraction.data.frame <- function(x, prior_sd, variables = NULL, scale = "sd", alpha = 0.8, parameters = NULL) { + if(!is.null(parameters)) { + warning("The `parameters` argument is deprecated use `variables` instead.") + if(is.null(variables)) { + variables <- parameters + } + } + + # Ensuring backwards compatibility + if("parameter" %in% names(x)) { + if(!("variable" %in% names(x))) { + warning("The x parameter contains a `parameter` column, which is deprecated, use `variable` instead.") + x$variable <- x$parameter + } + } + + if(!all(c("variable", "sd") %in% names(x))) { + stop("The data.frame needs a 'variable' and 'sd' columns") } if(!is.numeric(prior_sd) || is.null(names(prior_sd))) { stop("prior_sd has to be a named vector") } - if(!is.null(parameters)) { - prior_sd <- prior_sd[names(prior_sd) %in% parameters] - x <- dplyr::filter(x, parameter %in% parameters) + if(!is.null(variables)) { + prior_sd <- prior_sd[names(prior_sd) %in% variables] + x <- dplyr::filter(x, variable %in% variables) } if(nrow(x) == 0 || length(prior_sd) == 0) { stop("No data to plot.") } - shared_params <- intersect(unique(x$parameter), names(prior_sd)) - if(length(shared_params) < length(unique(x$parameter))) { - warning("Some parameters do not have prior_sd in the data: ", setdiff(unique(x$parameter), shared_params)) + shared_vars <- intersect(unique(x$variable), names(prior_sd)) + if(length(shared_vars) < length(unique(x$variable))) { + warning("Some variables do not have prior_sd in the data: ", setdiff(unique(x$variable), shared_vars)) } - if(length(shared_params) < length(prior_sd)) { - warning("Some prior_sd values do not have counterpart in the data: ", setdiff(names(prior_sd), shared_params)) + if(length(shared_vars) < length(prior_sd)) { + warning("Some prior_sd values do not have counterpart in the data: ", setdiff(names(prior_sd), shared_vars)) } - x <- dplyr::filter(x, parameter %in% shared_params) + x <- dplyr::filter(x, variable %in% shared_vars) - x$prior_sd <- prior_sd[x$parameter] + x$prior_sd <- prior_sd[x$variable] if(scale == "sd") { x <- dplyr::mutate(x, contraction = 1 - sd / prior_sd) } else if(scale == "var") { @@ -394,49 +522,74 @@ plot_contraction.data.frame <- function(x, prior_sd, parameters = NULL, scale = ggplot2::ggplot(x, aes(x = contraction, y = z_score)) + geom_point(alpha = alpha) + expand_limits(x = c(0,1)) + - facet_wrap(~parameter) + facet_wrap(~variable) } #' Plot the simulated "true" values versus posterior estimates #' #' @param x object containing results (a data.frame or [SBC_results()] object). -#' @param parameters parameters to show in the plot or `NULL` to show all +#' @param variables variables to show in the plot or `NULL` to show all #' @param estimate which estimate to use for the central tendency, #' must correspond a field already computed in the results (most likely `"mean"` and `"median"`). #' @param uncertainty which estimates to use for uncertainty (a character vector of length 2) #' must correspond a field already computed in the results. Pass `NULL` to avoid showing uncertainty at all. #' @param alpha the alpha for the points and uncertainty intervals +#' @param parameters DEPRECATED, use `variables` instead #' @return a ggplot2 plot object #' @export -plot_sim_estimated <- function(x, parameters = NULL, estimate = "mean", +plot_sim_estimated <- function(x, variables = NULL, estimate = "mean", uncertainty = c("q5", "q95"), - alpha = NULL) { + alpha = NULL, parameters = NULL) { UseMethod("plot_sim_estimated") } #' @export -plot_sim_estimated.SBC_results <- function(x, parameters = NULL, estimate = "mean", +plot_sim_estimated.SBC_results <- function(x, variables = NULL, estimate = "mean", uncertainty = c("q5", "q95"), - alpha = NULL) { - plot_sim_estimated(x$stats, parameters = parameters, estimate = estimate, + alpha = NULL, parameters = NULL) { + if(!is.null(parameters)) { + warning("The `parameters` argument is deprecated use `variables` instead.") + if(is.null(variables)) { + variables <- parameters + } + } + + plot_sim_estimated(x$stats, variables = variables, estimate = estimate, uncertainty = uncertainty, alpha = alpha) } #' @export -plot_sim_estimated.data.frame <- function(x, parameters = NULL, estimate = "mean", +plot_sim_estimated.data.frame <- function(x, variables = NULL, estimate = "mean", uncertainty = c("q5", "q95"), - alpha = NULL) { - if(!all(c("parameter", estimate, uncertainty) %in% names(x))) { - stop("The data.frame needs a 'parameter' and '", estimate, "' columns") - } + alpha = NULL, parameters = NULL) { if(!is.null(parameters)) { - x <- dplyr::filter(x, parameter %in% parameters) + warning("The `parameters` argument is deprecated use `variables` instead.") + if(is.null(variables)) { + variables <- parameters + } + } + + # Ensuring backwards compatibility + if("parameter" %in% names(x)) { + if(!("variable" %in% names(x))) { + warning("The x parameter contains a `parameter` column, which is deprecated, use `variable` instead.") + x$variable <- x$parameter + } + } + + required_columns <- c("variable", estimate, uncertainty) + if(!all(required_columns %in% names(x))) { + stop("The data.frame needs to have the following columns: ", paste0("'", required_columns, "'", collapse = ", ")) + } + + if(!is.null(variables)) { + x <- dplyr::filter(x, variable %in% variables) } if(is.null(alpha)) { - n_points <- dplyr::summarise(dplyr::group_by(x, parameter), count = dplyr::n()) + n_points <- dplyr::summarise(dplyr::group_by(x, variable), count = dplyr::n()) max_points <- max(n_points$count) alpha_guess <- 1 / ((max_points * 0.06) + 1) alpha <- max(0.05, alpha_guess) @@ -465,43 +618,72 @@ plot_sim_estimated.data.frame <- function(x, parameters = NULL, estimate = "mean geom_abline(intercept = 0, slope = 1, color = "skyblue1", size = 2) + main_geom + scale_y_continuous(estimate) + - facet_wrap(~parameter, scales = "free") + facet_wrap(~variable, scales = "free") } -#' Plot the observed coverage and its uncertainty +#' Plot the observed coverage and its uncertainty. #' +#' `plot_coverage` will plot the observed coverage, +#' while `plot_coverage_diff` will show the difference between observed +#' and expected coverage. #' Please refer to [empirical_coverage()] for details on computation #' and limitations of this plot as well as details on the arguments. +#' See `vignette("rank_visualizations")` for +#' more details. #' #' @param x object containing results (a data.frame or [SBC_results()] object). -#' @param parameters parameters to show in the plot or `NULL` to show all +#' @param variables variables to show in the plot or `NULL` to show all #' @param prob the with of the uncertainty interval to be shown +#' @param parameters DEPRECATED. Use `variables` instead. #' @return a ggplot2 plot object +#' @seealso empirical_coverage #' @export -plot_coverage <- function(x, parameters = NULL, prob = 0.95, - interval_type = "central") { +plot_coverage <- function(x, variables = NULL, prob = 0.95, + interval_type = "central", parameters = NULL) { UseMethod("plot_coverage") } -#' @rdname plot_coverage #' @export -plot_coverage.SBC_results <- function(x, parameters = NULL, prob = 0.95, - interval_type = "central") { - plot_coverage(x$stats, parameters = parameters, prob = prob, interval_type = interval_type) +plot_coverage.SBC_results <- function(x, variables = NULL, prob = 0.95, + interval_type = "central", parameters = NULL) { + + if(!is.null(parameters)) { + warning("The `parameters` argument is deprecated use `variables` instead.") + if(is.null(variables)) { + variables <- parameters + } + } + + plot_coverage(x$stats, variables = variables, prob = prob, interval_type = interval_type) } -#' @rdname plot_coverage #' @export -plot_coverage.data.frame <- function(x, parameters = NULL, prob = 0.95, - interval_type = "central") { - if(!all(c("parameter", "rank", "max_rank") %in% names(x))) { +plot_coverage.data.frame <- function(x, variables = NULL, prob = 0.95, + interval_type = "central", parameters = NULL) { + + # Ensuring backwards compatibility + if("parameter" %in% names(x)) { + if(!("variable" %in% names(x))) { + warning("The x parameter contains a `parameter` column, which is deprecated, use `variable` instead.") + x$variable <- x$parameter + } + } + + if(!all(c("variable", "rank", "max_rank") %in% names(x))) { stop(SBC_error("SBC_invalid_argument_error", - "The stats data.frame needs a 'parameter', 'rank' and 'max_rank' columns")) + "The stats data.frame needs a 'variable', 'rank' and 'max_rank' columns")) } if(!is.null(parameters)) { - x <- dplyr::filter(x, parameter %in% parameters) + warning("The `parameters` argument is deprecated use `variables` instead.") + if(is.null(variables)) { + variables <- parameters + } + } + + if(!is.null(variables)) { + x <- dplyr::filter(x, variable %in% variables) } max_max_rank <- max(x$max_rank) @@ -512,11 +694,56 @@ plot_coverage.data.frame <- function(x, parameters = NULL, prob = 0.95, ymin = ci_low, ymax = ci_high)) + geom_ribbon(fill = "black", alpha = 0.33) + geom_segment(x = 0, y = 0, xend = 1, yend = 1, color = "skyblue1", size = 2) + - #geom_abline(intercept = 0, slope = 1, color = "skyblue1", size = 2) + geom_line() + - scale_x_continuous(paste0(interval_type, " interval width")) + - scale_y_continuous("Observed coverage") + - facet_wrap(~parameter) + scale_x_continuous(paste0(interval_type, " interval width"), + labels = scales::percent) + + scale_y_continuous("Observed coverage", labels = scales::percent) + + facet_wrap(~variable) +} + + +#' @rdname plot_coverage +#' @export +plot_coverage_diff <- function(x, variables = NULL, prob = 0.95, + interval_type = "central", parameters = NULL) { + UseMethod("plot_coverage_diff") +} + +#' @export +plot_coverage_diff.SBC_results <- function(x, variables = NULL, prob = 0.95, + interval_type = "central") { + plot_coverage_diff(x$stats, variables = variables, prob = prob, interval_type = interval_type) +} + +#' @export +plot_coverage_diff.data.frame <- function(x, variables = NULL, prob = 0.95, + interval_type = "central", parameters = NULL) { + if(!all(c("variable", "rank", "max_rank") %in% names(x))) { + stop(SBC_error("SBC_invalid_argument_error", + "The stats data.frame needs a 'variable', 'rank' and 'max_rank' columns")) + } + if(!is.null(variables)) { + x <- dplyr::filter(x, variable %in% variables) + } + + max_max_rank <- max(x$max_rank) + coverage <- empirical_coverage(x, (0:max_max_rank) / (max_max_rank + 1), prob = prob, + interval_type = interval_type) + coverage <- dplyr::mutate(coverage, + diff = estimate - width_represented, + diff_low = ci_low - width_represented, + diff_high = ci_high - width_represented + ) + + ggplot2::ggplot(coverage, aes(x = width_represented, y = diff, + ymin = diff_low, ymax = diff_high)) + + geom_ribbon(fill = "black", alpha = 0.33) + + geom_segment(x = 0, y = 0, xend = 1, yend = 0, color = "skyblue1", size = 2) + + geom_line() + + scale_x_continuous(paste0(interval_type, " interval width"), + labels = scales::percent) + + scale_y_continuous("Coverage diff", labels = scales::percent) + + facet_wrap(~variable, scales = "free_y") } diff --git a/R/results.R b/R/results.R index 9fc0686..6a11dbd 100644 --- a/R/results.R +++ b/R/results.R @@ -1,8 +1,11 @@ -#' SBC_results objects. +#' @title Create an `SBC_results` object #' +#' @description +#' This will build and validate an `SBC_results` object from its constituents. #' +#' @details #' The `SBC_results` contains the following fields: -#' - `$stats` statistics for all parameters and fits (one row per parameter-fit combination) +#' - `$stats` statistics for all variables and fits (one row per variable-fit combination) #' - `$fits` the raw fits (unless `keep_fits = FALSE`) or `NULL` if the fit failed #' - `$errors` error messages that caused fit failures #' - `$outputs`, `$messages`, `$warnings` the outputs/messages/warnings written by fits @@ -25,8 +28,8 @@ SBC_results <- function(stats, } compute_default_diagnostics <- function(stats) { - dplyr::summarise(dplyr::group_by(stats, dataset_id), - n_params = dplyr::n(), + dplyr::summarise(dplyr::group_by(stats, sim_id), + n_vars = dplyr::n(), max_rhat = max(c(-Inf, rhat)), min_ess_bulk = min(c(Inf, ess_bulk)), min_ess_tail = min(c(Inf, ess_tail)), @@ -41,6 +44,15 @@ validate_SBC_results <- function(x) { stop("SBC_results object has to have a 'stats' field of type data.frame") } + # Ensure backwards compatibility + if("dataset_id" %in% names(x$stats)) { + x$stats <- dplyr::rename(x$stats, sim_id = dataset_id) + } + + if("parameter" %in% names(x$stats)) { + x$stats <- dplyr::rename(x$stats, variable = parameter) + } + if(!is.list(x$fits)) { stop("SBC_results object has to have a 'fits' field of type list") } @@ -53,19 +65,24 @@ validate_SBC_results <- function(x) { stop("If the SBC_results object has a 'default_diagnostics' field, it has to inherit from data.frame") } + # Ensure backwards compatibility + if("parameter" %in% names(x$default_diagnostics)) { + x$stats <- dplyr::rename(x$stats, variable = parameter) + } + if(!is.list(x$errors)) { stop("SBC_results object has to have an 'errors' field of type list") } if(nrow(x$stats) > 0) { - if(!is.numeric(x$stats$dataset_id)) { - stop("The dataset_id column of stats needs to be a number.") + if(!is.numeric(x$stats$sim_id)) { + stop("The sim_id column of stats needs to be a number.") } - if(min(x$stats$dataset_id) < 1 || max(x$stats$dataset_id) > length(x$fits)) { - stop("stats$dataset_id values must be between 1 and number of fits") + if(min(x$stats$sim_id) < 1 || max(x$stats$sim_id) > length(x$fits)) { + stop("stats$sim_id values must be between 1 and number of fits") } } @@ -88,24 +105,36 @@ validate_SBC_results <- function(x) { } if(!is.null(x$backend_diagnostics) && nrow(x$backend_diagnostics) > 0) { - if(!is.numeric(x$backend_diagnostics$dataset_id)) { - stop("The dataset_id column of 'backend_diagnostics' needs to be a number.") + + # Ensure backwards compatibility + if("dataset_id" %in% names(x$backend_diagnostics)) { + x$backend_diagnostics <- dplyr::rename(x$backend_diagnostics, sim_id = dataset_id) } - if(min(x$backend_diagnostics$dataset_id) < 1 || max(x$backend_diagnostics$dataset_id > length(x$fits))) { - stop("backend_diagnostics$dataset_id values must be between 1 and number of fits") + if(!is.numeric(x$backend_diagnostics$sim_id)) { + stop("The sim_id column of 'backend_diagnostics' needs to be a number.") + } + + + if(min(x$backend_diagnostics$sim_id) < 1 || max(x$backend_diagnostics$sim_id > length(x$fits))) { + stop("backend_diagnostics$sim_id values must be between 1 and number of fits") } } if(nrow(x$default_diagnostics) > 0) { - if(!is.numeric(x$default_diagnostics$dataset_id)) { - stop("The dataset_id column of 'default_diagnostics' needs to be a number.") + # Ensure backwards compatibility + if("dataset_id" %in% names(x$default_diagnostics)) { + x$default_diagnostics <- dplyr::rename(x$default_diagnostics, sim_id = dataset_id) + } + + if(!is.numeric(x$default_diagnostics$sim_id)) { + stop("The sim_id column of 'default_diagnostics' needs to be a number.") } - if(min(x$default_diagnostics$dataset_id) < 1 || max(x$default_diagnostics$dataset_id > length(x$fits))) { - stop("default_diagnostics$dataset_id values must be between 1 and number of fits") + if(min(x$default_diagnostics$sim_id) < 1 || max(x$default_diagnostics$sim_id > length(x$fits))) { + stop("default_diagnostics$sim_id values must be between 1 and number of fits") } } @@ -114,13 +143,13 @@ validate_SBC_results <- function(x) { stop("Needs equal no. of fits and errors") } - #TODO check identical par names + #TODO check identical var names x } #' Combine multiple SBC results together. #' -#' Primarily useful for iteratively adding more datasets to your SBC check. +#' Primarily useful for iteratively adding more simulations to your SBC check. #' #' An example usage can be found in the `small_model_workflow` vignette. #' @param ... objects of type `SBC_results` to be combined. @@ -140,30 +169,30 @@ bind_results <- function(...) { warnings_list <- purrr::map(args, function(x) x$warnings) outputs_list <- purrr::map(args, function(x) x$outputs) - # Ensure unique dataset_ids - max_ids <- as.numeric(purrr::map(stats_list, function(x) max(x$dataset_id))) + # Ensure unique sim_ids + max_ids <- as.numeric(purrr::map(stats_list, function(x) max(x$sim_id))) shifts <- c(0, max_ids[1:(length(max_ids)) - 1]) # Shift of IDs per dataset - shift_dataset_id <- function(x, shift) { + shift_sim_id <- function(x, shift) { if(is.null(x)) { x } else { - dplyr::mutate(x, dataset_id = dataset_id + shift) + dplyr::mutate(x, sim_id = sim_id + shift) } } - # Combines multiple data frame objects and then sorts by dataset_id + # Combines multiple data frame objects and then sorts by sim_id bind_and_rearrange_df <- function(df_list) { dplyr::arrange( do.call(rbind, df_list), - dataset_id + sim_id ) } # Apply the shifts of IDs to individual stats/diagnostics data frames - stats_list <- purrr::map2(stats_list, shifts, shift_dataset_id) - backend_diagnostics_list <- purrr::map2(backend_diagnostics_list, shifts, shift_dataset_id) - default_diagnostics_list <- purrr::map2(default_diagnostics_list, shifts, shift_dataset_id) + stats_list <- purrr::map2(stats_list, shifts, shift_sim_id) + backend_diagnostics_list <- purrr::map2(backend_diagnostics_list, shifts, shift_sim_id) + default_diagnostics_list <- purrr::map2(default_diagnostics_list, shifts, shift_sim_id) # Combine all the elements into a bigger object SBC_results(stats = bind_and_rearrange_df(stats_list), @@ -202,9 +231,9 @@ length.SBC_results <- function(x) { if(is.null(df)) { NULL } - filtered <- dplyr::filter(df, dataset_id %in% indices_to_keep) - remapped <- dplyr::mutate(filtered, dataset_id = index_map[as.character(dataset_id)]) - dplyr::arrange(remapped, dataset_id) + filtered <- dplyr::filter(df, sim_id %in% indices_to_keep) + remapped <- dplyr::mutate(filtered, sim_id = index_map[as.character(sim_id)]) + dplyr::arrange(remapped, sim_id) } SBC_results(stats = subset_run_df(x$stats), @@ -218,6 +247,24 @@ length.SBC_results <- function(x) { } +#' @title Compute SBC results +#' @description Delegates directly to `compute_SBC()`. +#' +#' @name compute_results-deprecated +#' @seealso \code{\link{SBC-deprecated}} +#' @keywords internal +NULL + +#' @rdname SBC-deprecated +#' @section \code{compute_results}: +#' Instead of \code{compute_results}, use \code{\link{compute_SBC}}. +#' +#' @export +compute_results <- function(...) { + warning("compute_results() is deprecated, use compute_SBC instead.") + compute_SBC(...) +} + #' Fit datasets and evaluate diagnostics and SBC metrics. #' #' Performs the main SBC routine given datasets and a backend. @@ -232,7 +279,7 @@ length.SBC_results <- function(x) { #' # Thinning #' #' When using backends based on MCMC, there are two possible moments when -#' samples may need to be thinned. They can be thinned directly within the backend +#' draws may need to be thinned. They can be thinned directly within the backend #' and they may be thinned only to compute the ranks for SBC as specified by the #' `thin_ranks` argument. The main reason those are separate is that computing the #' ranks requires no or negligible autocorrelation while some autocorrelation @@ -246,12 +293,27 @@ length.SBC_results <- function(x) { #' it might be sensible to thin quite aggressively already in the backend and #' then have some additional thinning via `thin_ranks`. #' -#' Backends that don't require thining should implement [SBC_backend_iid_samples()] +#' Backends that don't require thining should implement [SBC_backend_iid_draws()] #' or [SBC_backend_default_thin_ranks()] to avoid thinning by default. #' +#' # Rank divisors +#' +#' Some of the visualizations and post processing steps +#' we use in the SBC package (e.g. [plot_rank_hist()], [empirical_coverage()]) +#' work best if the total number of possible SBC ranks is a "nice" number +#' (lots of divisors). +#' However, the number of ranks is one plus the number of posterior samples +#' after thinning - therefore as long as the number of samples is a "nice" +#' number, the number of ranks usually will not be. To remedy this, you can +#' specify `ensure_num_ranks_divisor` - the method will drop at most +#' `ensure_num_ranks_divisor - 1` samples to make the number of ranks divisible +#' by `ensure_num_ranks_divisor`. The default 2 prevents the most annoying +#' pathologies while discarding at most a single sample. +#' #' @param datasets an object of class `SBC_datasets` #' @param backend the model + sampling algorithm. The built-in backends can be constructed -#' using [SBC_backend_cmdstan_sample()], [SBC_backend_cmdstan_variational()], [SBC_backend_rstan_sample()] and [SBC_backend_brms()]. +#' using [SBC_backend_cmdstan_sample()], [SBC_backend_cmdstan_variational()], +#' [SBC_backend_rstan_sample()], [SBC_backend_rstan_optimizing()] and [SBC_backend_brms()]. #' (more to come: issue 31, 38, 39). The backend is an S3 class supporting at least the [SBC_fit()], #' [SBC_fit_to_draws_matrix()] methods. #' @param cores_per_fit how many cores should the backend be allowed to use for a single fit? @@ -259,14 +321,16 @@ length.SBC_results <- function(x) { #' than you have cores. See [default_cores_per_fit()]. #' @param keep_fits boolean, when `FALSE` full fits are discarded from memory - #' reduces memory consumption and increases speed (when processing in parallel), but -#' prevents you from inspecting the fits and using [recompute_statistics()]. +#' prevents you from inspecting the fits and using [recompute_SBC_statistics()]. #' We recommend to set to `TRUE` in early phases of workflow, when you run just a few fits. #' Once the model is stable and you want to run a lot of iterations, we recommend setting #' to `FALSE` (even for quite a simple model, 1000 fits can easily exhaust 32GB of RAM). -#' @param thin_ranks how much thinning should be applied to posterior samples before computing +#' @param thin_ranks how much thinning should be applied to posterior draws before computing #' ranks for SBC. Should be large enough to avoid any noticeable autocorrelation of the -#' thinned samples. -#' @param chunk_size How many fits of `datasets` shall be processed in one batch +#' thinned draws See details below. +#' @param ensure_num_ranks_divisor Potentially drop some posterior samples to +#' ensure that this number divides the total number of SBC ranks (see Details). +#' @param chunk_size How many simulations within the `datasets` shall be processed in one batch #' by the same worker. Relevant only when using parallel processing. #' The larger the value, the smaller overhead there will be for parallel processing, but #' the work may be distributed less equally across workers. We recommend setting this high @@ -287,10 +351,11 @@ length.SBC_results <- function(x) { #' objects available on all workers. #' @return An object of class [SBC_results()]. #' @export -compute_results <- function(datasets, backend, +compute_SBC <- function(datasets, backend, cores_per_fit = default_cores_per_fit(length(datasets)), keep_fits = TRUE, thin_ranks = SBC_backend_default_thin_ranks(backend), + ensure_num_ranks_divisor = 2, chunk_size = default_chunk_size(length(datasets)), gen_quants = NULL, cache_mode = "none", @@ -317,6 +382,11 @@ compute_results <- function(datasets, backend, backend_hash <- SBC_backend_hash_for_cache(backend) data_hash <- rlang::hash(datasets) + # Ensure backwards compatibility of cache + datasets_old <- datasets + names(datasets_old)[names(datasets) == "variables"] <- "parameters" + data_hash_old <- rlang::hash(datasets_old) + if(file.exists(cache_location)) { results_from_cache <- readRDS(cache_location) if(!is.list(results_from_cache) || @@ -326,23 +396,31 @@ compute_results <- function(datasets, backend, warning("Cache file exists but is in invalid format. Will recompute.") } else if(results_from_cache$backend_hash != backend_hash) { message("Cache file exists but the backend hash differs. Will recompute.") - } else if(results_from_cache$data_hash != data_hash) { + } else if(results_from_cache$data_hash != data_hash && results_from_cache$data_hash != data_hash_old) { message("Cache file exists but the datasets hash differs. Will recompute.") } else { + if(is.null(results_from_cache$ensure_num_ranks_divisor)) { + results_from_cache$ensure_num_ranks_divisor <- 1 + } + result <- tryCatch(validate_SBC_results(results_from_cache$result), error = function(e) { NULL }) + if(is.null(result)) { warning("Cache file contains invalid SBC_results object. Will recompute.") } else if(results_from_cache$thin_ranks != thin_ranks || - !identical(results_from_cache$gen_quants, gen_quants)) { + !identical(results_from_cache$gen_quants, gen_quants) || + results_from_cache$ensure_num_ranks_divisor != ensure_num_ranks_divisor) { if(!results_from_cache$keep_fits) { - message("Cache file exists, but was computed with different thin_ranks/gen_quants and keep_fits == FALSE. Will recompute.") + message("Cache file exists, but was computed with different thin_ranks/gen_quants/ensure_num_ranks_divisor and keep_fits == FALSE. Will recompute.") } else { message(paste0("Results loaded from cache file '", cache_basename, - "' but it was computed with different thin_ranks/gen_quants.\n", - "Calling recompute_statistics.")) - return(recompute_statistics(old_results = result, datasets = datasets, - thin_ranks = thin_ranks, gen_quants = gen_quants, + "' but it was computed with different thin_ranks/gen_quants/ensure_num_ranks_divisor.\n", + "Calling recompute_SBC_statistics.")) + return(recompute_SBC_statistics(old_results = result, datasets = datasets, + thin_ranks = thin_ranks, + ensure_num_ranks_divisor = ensure_num_ranks_divisor, + gen_quants = gen_quants, backend = backend)) } } else { @@ -364,10 +442,10 @@ compute_results <- function(datasets, backend, # Create combined data for computation - params_and_generated_list <- list() + vars_and_generated_list <- list() for(i in 1:length(datasets)) { - params_and_generated_list[[i]] <- list( - parameters = posterior::subset_draws(datasets$parameters, + vars_and_generated_list[[i]] <- list( + variables = posterior::subset_draws(datasets$variables, draw = i), generated = datasets$generated[[i]] ) @@ -375,13 +453,27 @@ compute_results <- function(datasets, backend, if(is.null(gen_quants)) { future.globals <- globals } else { - future.globals <- c(globals, attr(gen_quants, "globals")) + gq_globals <- attr(gen_quants, "globals") + if(length(globals) > 0 && length(gq_globals > 0)) { + if(is.list(gq_globals) && !is.list(globals)) { + stop(SBC_error("Not implemented: Currently, when globals in generated quantites are a list, globals argument has to be also a list (not a character vector).")) + } else if(!is.list(gq_globals) && is.list(globals)) { + stop(SBC_error("Not implemented: Currently, when globals is a list, globals in generated quantites have to be also a list (not a character vector).")) + } + future.globals <- c(globals, gq_globals) + } + if(length(gq_globals) > 0) { + future.globals <- gq_globals + } else { + future.globals <- globals + } } results_raw <- future.apply::future_lapply( - params_and_generated_list, SBC:::compute_results_single, + vars_and_generated_list, SBC:::compute_SBC_single, backend = backend, cores = cores_per_fit, keep_fit = keep_fits, thin_ranks = thin_ranks, + ensure_num_ranks_divisor = ensure_num_ranks_divisor, gen_quants = gen_quants, future.seed = TRUE, future.globals = future.globals, @@ -403,25 +495,25 @@ compute_results <- function(datasets, backend, } if(is.null(results_raw[[i]]$error)) { stats_list[[i]] <- results_raw[[i]]$stats - stats_list[[i]]$dataset_id <- i - stats_list[[i]] <- dplyr::select(stats_list[[i]], dataset_id, tidyselect::everything()) + stats_list[[i]]$sim_id <- i + stats_list[[i]] <- dplyr::select(stats_list[[i]], sim_id, tidyselect::everything()) backend_diagnostics_list[[i]] <- results_raw[[i]]$backend_diagnostics if(!is.null(results_raw[[i]]$backend_diagnostics)){ - backend_diagnostics_list[[i]]$dataset_id <- i - backend_diagnostics_list[[i]] <- dplyr::select(backend_diagnostics_list[[i]], dataset_id, tidyselect::everything()) + backend_diagnostics_list[[i]]$sim_id <- i + backend_diagnostics_list[[i]] <- dplyr::select(backend_diagnostics_list[[i]], sim_id, tidyselect::everything()) } } else { if(n_errors < max_errors_to_show) { if(is.null(results_raw[[i]]$fit)) { - message("Dataset ", i, " resulted in error when fitting.\n") + message("Simulation ", i, " resulted in error when fitting.\n") message(results_raw[[i]]$error, "\n") if(!is.null(results_raw[[i]]$warnings)) { - message(" --- Warnings for fit ", i, " ----") + message(" --- Warnings for sim ", i, " ----") message(paste0(results_raw[[i]]$warnings, collapse = "\n")) } if(!is.null(results_raw[[i]]$messages)) { - message(" --- Messages for fit ", i, " ----") + message(" --- Messages for sim ", i, " ----") message(paste0(results_raw[[i]]$messages, collapse = "\n")) } if(is.null(results_raw[[i]]$output)) { @@ -430,16 +522,16 @@ compute_results <- function(datasets, backend, message(" ---- Model output ----") cat(paste0(results_raw[[i]]$output, collapse = "\n")) } - message("\n ---- End of output for dataset ", i, " -----") + message("\n ---- End of output for simulation ", i, " -----") } else { - message("Dataset ", i, " resulted in error when post-processing the fit.\n", - "Calling `recompute_statistics` after you've found and fixed the problem could ", + message("Simulation ", i, " resulted in error when post-processing the fit.\n", + "Calling `recompute_SBC_statistics` after you've found and fixed the problem could ", "let you move further without refitting") message(results_raw[[i]]$error, "\n") } } else if(n_errors == max_errors_to_show) { - message("Too many datasets produced errors. Further error messages not shown.\n") + message("Too many simulations produced errors. Further error messages not shown.\n") } n_errors <- n_errors + 1 errors[[i]] <- results_raw[[i]]$error @@ -456,26 +548,28 @@ compute_results <- function(datasets, backend, } if(n_errors == length(datasets)) { - warning("All datasets produced error when fitting") + warning("All simulations produced error when fitting") } else if(n_errors > 0) { - warning("Total of ", n_errors, " datasets produced errors.") + warning("Total of ", n_errors, " simulations produced errors.") } stats <- do.call(rbind, stats_list) backend_diagnostics <- do.call(rbind, backend_diagnostics_list) if(!is.null(stats)) { - check_stats(stats, datasets, thin_ranks) + check_stats(stats, datasets, thin_ranks = thin_ranks, + ensure_num_ranks_divisor = ensure_num_ranks_divisor, + iid_draws = SBC_backend_iid_draws(backend)) } else { # Return dummy stats that let the rest of the code work. - stats <- data.frame(dataset_id = integer(0), rhat = numeric(0), ess_bulk = numeric(0), + stats <- data.frame(sim_id = integer(0), rhat = numeric(0), ess_bulk = numeric(0), ess_tail = numeric(0), rank = integer(0), simulated_value = numeric(0), max_rank = integer(0)) } default_diagnostics <- tryCatch( { compute_default_diagnostics(stats) }, - error = function(e) { warning("Error when computing param diagnostics. ", e); NULL }) + error = function(e) { warning("Error when computing default per-variable diagnostics. ", e); NULL }) res <- SBC_results(stats = stats, fits = fits, outputs = outputs, @@ -488,6 +582,7 @@ compute_results <- function(datasets, backend, if(cache_mode == "results") { results_for_cache <- list(result = res, backend_hash = backend_hash, data_hash = data_hash, thin_ranks = thin_ranks, + ensure_num_ranks_divisor = ensure_num_ranks_divisor, gen_quants = gen_quants, keep_fits = keep_fits) tryCatch(saveRDS(results_for_cache, file = cache_location), error = function(e) { warning("Error when saving cache file: ", e) }) @@ -542,7 +637,10 @@ capture_all_outputs <- function(expr) { logs <<- new_l } output <- capture.output({ - res <- withCallingHandlers( + previous_try_outfile <- getOption("try.outFile") + options(try.outFile = stdout()) + res <- tryCatch( + withCallingHandlers( expr, warning=function(w) { add_log("warning", conditionMessage(w)) @@ -550,17 +648,33 @@ capture_all_outputs <- function(expr) { }, message = function(m) { add_log("message", conditionMessage(m)) invokeRestart("muffleMessage") + }), + finally = { + options(try.outFile = previous_try_outfile) }) }, type = "output") list(result = res, messages = do.call(c, logs$message), warnings = do.call(c, logs$warning), output = output) } +# Re-emit what was captured with capture_all_outputs +reemit_captured <- function(captured) { + cat(captured$output, sep = "\n") + for(m in 1:length(captured$messages)) { + message(captured$messages[m], appendLF = FALSE) + } + for(w in 1:length(captured$warnings)) { + warning(captured$warnings[w]) + } +} -compute_results_single <- function(params_and_generated, backend, cores, - keep_fit, thin_ranks, gen_quants) { +# See `compute_SBC` for docs for the function arguments +compute_SBC_single <- function(vars_and_generated, backend, cores, + keep_fit, thin_ranks, + ensure_num_ranks_divisor, + gen_quants) { - parameters <- params_and_generated$parameters - generated <- params_and_generated$generated + variables <- vars_and_generated$variables + generated <- vars_and_generated$generated # Note: explicitly referencing functions from the SBC package is needed # here as the function might be run in a separate R session that does not @@ -581,8 +695,9 @@ compute_results_single <- function(params_and_generated, backend, cores, if(is.null(res$error)) { error_stats <- SBC:::capture_all_outputs({ tryCatch( { - res$stats <- SBC::statistics_from_single_fit( - res$fit, parameters = parameters, thin_ranks = thin_ranks, + res$stats <- SBC::SBC_statistics_from_single_fit( + res$fit, variables = variables, thin_ranks = thin_ranks, + ensure_num_ranks_divisor = ensure_num_ranks_divisor, generated = generated, gen_quants = gen_quants, backend = backend) @@ -620,13 +735,16 @@ compute_results_single <- function(params_and_generated, backend, cores, #' Recompute SBC statistics given a single fit. #' #' Potentially useful for doing some advanced stuff, but should not -#' be used in regular workflow. Use [recompute_statistics()] to update +#' be used in regular workflow. Use [recompute_SBC_statistics()] to update #' an `[SBC_results]` objects with different `thin_ranks` or other settings. #' +#' @inheritParams compute_SBC #' @export -#' @seealso [recompute_statistics()] -statistics_from_single_fit <- function(fit, parameters, generated, - thin_ranks, gen_quants, +#' @seealso [recompute_SBC_statistics()] +SBC_statistics_from_single_fit <- function(fit, variables, generated, + thin_ranks, + ensure_num_ranks_divisor, + gen_quants, backend) { fit_matrix <- SBC_fit_to_draws_matrix(fit) @@ -636,37 +754,52 @@ statistics_from_single_fit <- function(fit, parameters, generated, gq_fit <- compute_gen_quants(fit_matrix, generated, gen_quants) fit_matrix <- posterior::bind_draws(fit_matrix, gq_fit, along = "variable") - gq_parameter <- compute_gen_quants(parameters, generated, gen_quants) - parameters <- posterior::bind_draws(parameters, gq_parameter, along = "variable") + gq_variable <- compute_gen_quants(variables, generated, gen_quants) + variables <- posterior::bind_draws(variables, gq_variable, along = "variable") } - shared_pars <- intersect(posterior::variables(parameters), + shared_vars <- intersect(posterior::variables(variables), posterior::variables(fit_matrix)) - # Make sure the order of parameters matches - parameters <- posterior::subset_draws(parameters, variable = shared_pars) + # Make sure the order of variables matches + variables <- posterior::subset_draws(variables, variable = shared_vars) - fit_matrix <- posterior::subset_draws(fit_matrix, variable = shared_pars) + fit_matrix <- posterior::subset_draws(fit_matrix, variable = shared_vars) fit_thinned <- posterior::thin_draws(fit_matrix, thin_ranks) - stats <- posterior::summarise_draws(fit_matrix) - if(SBC_backend_iid_samples(backend)) { - ## iid samples have the bestest diagnostics by construction + if(SBC_backend_iid_draws(backend)) { + stats <- posterior::summarise_draws(fit_matrix, posterior::default_summary_measures()) + ## iid draws have the bestest diagnostics by construction stats$rhat <- 1 stats$ess_bulk <- posterior::ndraws(fit_matrix) stats$ess_tail <- posterior::ndraws(fit_matrix) + } else { + stats <- posterior::summarise_draws(fit_matrix) } - stats <- dplyr::rename(stats, parameter = variable) - stats$simulated_value <- as.numeric(parameters) + stats$simulated_value <- as.numeric(variables) - ranks <- calculate_ranks_draws_matrix(parameters, fit_thinned) - if(!identical(stats$parameter, names(ranks))) { + # Ensure number of ranks divisible by ensure_num_ranks_divisor + # Note that the number of ranks is the number of samples + 1 + ndraws_to_discard <- (posterior::ndraws(fit_thinned) + 1) %% ensure_num_ranks_divisor + if(ndraws_to_discard > 0) { + ndraws_to_keep <- posterior::ndraws(fit_thinned) - ndraws_to_discard + if(ndraws_to_keep > 0) { + fit_thinned <- posterior::subset_draws( + posterior::merge_chains(fit_thinned), draw = 1:ndraws_to_keep) + } else { + warning("Enforcing ensure_num_ranks_divisor = ", ensure_num_ranks_divisor, + "would lead to no samples being left and was ignored.") + } + } + + ranks <- calculate_ranks_draws_matrix(variables, fit_thinned) + if(!identical(stats$variable, names(ranks))) { stop("A naming conflict") } stats$rank <- ranks @@ -674,36 +807,44 @@ statistics_from_single_fit <- function(fit, parameters, generated, stats$z_score <- (stats$simulated_value - stats$mean) / stats$sd stats <- dplyr::select( - stats, parameter, simulated_value, rank, z_score, tidyselect::everything()) + stats, variable, simulated_value, rank, z_score, tidyselect::everything()) stats } -# check that the computed stats data frame hs problems -check_stats <- function(stats, datasets, thin_ranks) { +# check that the computed stats data frame has problems +check_stats <- function(stats, datasets, thin_ranks, + ensure_num_ranks_divisor, iid_draws) { unique_max_ranks <- unique(stats$max_rank) if(length(unique_max_ranks) != 1) { warning("Differening max_rank across fits") } - if(min(unique_max_ranks) < 50) { + if(min(unique_max_ranks) < 49) { + if(iid_draws) { + message_end = " (the backend produces i.i.d. samples so thin_ranks = 1 is the most sensible)." + } else { + message_end = "." + } warning("Ranks were computed from fewer than 50 samples, the SBC checks will have low ", "precision.\nYou may need to increase the number of samples from the backend and make sure that ", - "the combination of thinning in the backend and `thin_ranks` is sensible.\n", - "Currently thin_ranks = ", thin_ranks, ".") + "the combination of thinning in the backend, `thin_ranks` and `ensure_num_ranks_divisor` is sensible.\n", + "Currently thin_ranks = ", thin_ranks, ", ensure_num_ranks_divisor = ", + ensure_num_ranks_divisor, + message_end) } - all_pars <- dplyr::summarise( - dplyr::group_by(stats, dataset_id), - all_pars = paste0(parameter, collapse = ","), .groups = "drop") - if(length(unique(all_pars$all_pars)) != 1) { - warning("Not all fits share the same parameters") + all_vars <- dplyr::summarise( + dplyr::group_by(stats, sim_id), + all_vars = paste0(variable, collapse = ","), .groups = "drop") + if(length(unique(all_vars$all_vars)) != 1) { + warning("Not all fits share the same variables") } - missing_pars <- setdiff(posterior::variables(datasets$parameters), stats$parameter) - if(length(missing_pars) > 0) { - warning("Some parameters missing in fits: ", paste0(missing_pars, collapse = ", ")) + missing_vars <- setdiff(posterior::variables(datasets$variables), stats$variable) + if(length(missing_vars) > 0) { + warning("Some variables missing in fits: ", paste0(missing_vars, collapse = ", ")) } } @@ -713,7 +854,7 @@ check_stats <- function(stats, datasets, thin_ranks) { #' When the expression contains non-library functions/objects, and parallel processing #' is enabled, those must be #' named in the `.globals` parameter (hopefully we'll be able to detect those -#' automatically in the future). Note that [recompute_statistics()] currently +#' automatically in the future). Note that [recompute_SBC_statistics()] currently #' does not use parallel processing, so `.globals` don't need to be set. #' #' @param ... named expressions representing the quantitites @@ -755,29 +896,51 @@ compute_gen_quants <- function(draws, generated, gen_quants) { do.call(posterior::draws_rvars, rvars) } +#' @title Recompute SBC statistics without refitting models. +#' @description Delegates directly to `recompute_SBC_statistics()`. +#' +#' @name recompute_statistics-deprecated +#' @seealso \code{\link{SBC-deprecated}} +#' @keywords internal +NULL + +#' @rdname SBC-deprecated +#' @section \code{recompute_statistics}: +#' Instead of \code{recompute_statistics}, use \code{\link{recompute_SBC_statistics}}. +#' +#' @export +recompute_statistics <- function(...) { + warning("recompute_statistics() is deprecated, use recompute_SBC_statistics instead.") + recompute_SBC_statistics(...) +} + #' Recompute SBC statistics without refitting models. #' +#' +#' #' Useful for example to recompute SBC ranks with a different choice of `thin_ranks` #' or added generated quantities. #' @return An S3 object of class `SBC_results` with updated `$stats` and `$default_diagnostics` fields. #' @param backend backend used to fit the results. Used to pull various defaults #' and other setting influencing the computation of statistics. +#' @inheritParams compute_SBC #' @export -recompute_statistics <- function(old_results, datasets, backend, +recompute_SBC_statistics <- function(old_results, datasets, backend, thin_ranks = SBC_backend_default_thin_ranks(backend), + ensure_num_ranks_divisor = 2, gen_quants = NULL) { validate_SBC_results(old_results) validate_SBC_datasets(datasets) if(length(old_results) != length(datasets)) { - stop("The number of fits in old_results does not match the number of datasets") + stop("The number of fits in old_results does not match the number of simulations") } new_results <- old_results missing_fits <- purrr::map_lgl(old_results$fits, is.null) if(all(missing_fits)) { stop("No raw fits preserved, cannot recompute. ", - "Either all datasets produced errors or the results were computed with keep_fits = FALSE") + "Either all simulations produced errors or the results were computed with keep_fits = FALSE") } else if(any(missing_fits)) { warning("Some raw fits not available. Those fits will be ignored when recomputing statistics") } @@ -785,27 +948,30 @@ recompute_statistics <- function(old_results, datasets, backend, new_stats_list <- list() for(i in 1:length(old_results)) { if(!is.null(old_results$fits[[i]])) { - parameters <- posterior::subset_draws(datasets$parameters, draw = i) - new_stats_list[[i]] <- statistics_from_single_fit(old_results$fits[[i]], - parameters = parameters, + variables <- posterior::subset_draws(datasets$variables, draw = i) + new_stats_list[[i]] <- SBC_statistics_from_single_fit(old_results$fits[[i]], + variables = variables, generated = datasets$generated[[i]], thin_ranks = thin_ranks, + ensure_num_ranks_divisor = ensure_num_ranks_divisor, gen_quants = gen_quants, backend = backend) - new_stats_list[[i]]$dataset_id <- i - new_stats_list[[i]] <- dplyr::select(new_stats_list[[i]], dataset_id, tidyselect::everything()) + new_stats_list[[i]]$sim_id <- i + new_stats_list[[i]] <- dplyr::select(new_stats_list[[i]], sim_id, tidyselect::everything()) } } new_stats <- do.call(rbind, new_stats_list) - check_stats(new_stats, datasets, thin_ranks) + check_stats(new_stats, datasets, thin_ranks = thin_ranks, + ensure_num_ranks_divisor = ensure_num_ranks_divisor, + iid_draws = SBC_backend_iid_draws(backend)) new_results$stats <- new_stats new_results$default_diagnostics <- tryCatch( { compute_default_diagnostics(new_stats) }, - error = function(e) { warning("Error when computing param diagnostics. ", e); NULL }) + error = function(e) { warning("Error when computing default per-variable diagnostics. ", e); NULL }) check_all_SBC_diagnostics(new_results) @@ -823,23 +989,32 @@ rdunif <- function(n, a, b) { ceiling(runif(n, min = a - 1, max= b)) } -#' Calculate ranks given parameter values within a posterior distribution. +#' Calculate ranks given variable values within a posterior distribution. #' -#' When there are ties (e.g. for discrete parameters), the rank is currently drawn stochastically +#' When there are ties (e.g. for discrete variables), the rank is currently drawn stochastically #' among the ties. -#' @param params a vector of values to check +#' @param variables a vector of values to check #' @param dm draws_matrix of the fit (assumed to be already thinned if that was necessary) +#' @param params DEPRECATED. Use `variables` instead. #' @export -calculate_ranks_draws_matrix <- function(params, dm) { +calculate_ranks_draws_matrix <- function(variables, dm, params = NULL) { #TODO validate input + + if(!is.null(params)) { + warning("The `params` argument is deprecated use `variables` instead.") + if(is.null(variables)) { + variables <- params + } + } + max_rank <- posterior::ndraws(dm) - less_matrix <- sweep(dm, MARGIN = 2, STATS = params, FUN = "<") + less_matrix <- sweep(dm, MARGIN = 2, STATS = variables, FUN = "<") rank_min <- colSums(less_matrix) - # When there are ties (e.g. for discrete parameters), the rank is currently drawn stochastically + # When there are ties (e.g. for discrete variables), the rank is currently drawn stochastically # among the ties - equal_matrix <- sweep(dm, MARGIN = 2, STATS = params, FUN = "==") + equal_matrix <- sweep(dm, MARGIN = 2, STATS = variables, FUN = "==") rank_range <- colSums(equal_matrix) ranks <- rank_min + rdunif(posterior::nvariables(dm), a = 0, b = rank_range) @@ -990,7 +1165,7 @@ get_diagnostic_messages.SBC_results_summary <- function(x) { if(x$n_low_ess_to_rank > 0) { msg <- paste0(x$n_low_ess_to_rank, " (", round(100 * x$n_low_ess_to_rank / x$n_fits), "%) fits had tail ESS undefined or less than ", "half of the maximum rank, potentially skewing \nthe rank statistics. The lowest tail ESS was ", round(x$min_min_ess_tail), - ".\n If the fits look good otherwise, increasing `thin_ranks` (via recompute_statistics) \nor number of posterior samples (by refitting) might help.") + ".\n If the fits look good otherwise, increasing `thin_ranks` (via recompute_SBC_statistics) \nor number of posterior draws (by refitting) might help.") message_list[[i]] <- data.frame(ok = FALSE, message = msg) } else { message_list[[i]] <- data.frame(ok = TRUE, message = "All fits had tail ESS > half of the maximum rank.") diff --git a/R/self-calib.R b/R/self-calib.R new file mode 100644 index 0000000..febea1c --- /dev/null +++ b/R/self-calib.R @@ -0,0 +1,525 @@ + + +##' Auto calibrate the initial prior samples using SBC iterations, with an adaptive update strategy +##' +##' @param generator function that generates datasets given each value in `param` +##' @param backend backend object to use for running SBC +##' @param updator hyperparameter update type +##' @param target_param list of strings indicating target parameter names +##' @param init_mu initial lambda_mu value to use +##' @param init_sigma initial lambda_sigma value to use +##' @param nsims number of datasets i.e. prior draws +##' @param niter number of calibration iterations to run +##' @param gamma convergence speed e.g. step size +##' @param tol tolerence for determining termination +##' @param fixed_args *named list* containing additional arguments to pass to generator, *after mu and sigma* +##' @export +self_calib_adaptive <- function(generator, backend, updator, target_params, init_lambdas, nsims, niter, gamma, tol, fixed_args){ + dist_types <- fixed_args$dist_types + + ############3 + gamma_estimator <- function(x){ + # mle estimate of shape, scale parameter for gamma distribution + N <- length(x) + sum_x <- sum(x) + sum_log_x <- sum(log(x)) + sum_x_mul_log_x <- sum(x * log(x)) + + k_hat <- (N * sum_x) / (N * sum_x_mul_log_x - sum_log_x * sum_x) + theta_hat <- 1 / N ^ 2 * (N * sum_x_mul_log_x - sum_log_x * sum_x) + + # bias correction + theta_hat <- N / (N - 1) * theta_hat + k_hat <- k_hat - 1 / N * (3 * k_hat - 2/3 * (k_hat / (1 + k_hat)) - 4/5 * (k_hat / (1 + k_hat) ^ 2)) + + return(list(alpha=k_hat, beta=1 / theta_hat)) + } + + lognormal_estimator <- function(x){ + n <- length(x) + mu_hat <- sum(log(x)) / n + sigma_hat <- sum((log(x) - mu_hat) ^ 2) / (n - 1) + return(list(mu=mu_hat, sigma=sigma_hat)) + } + + calculate_dap <- function(current_lambdas){ + nsims <- fixed_args$nsims + datasets <- do.call(generator, list(current_lambdas, fixed_args = fixed_args)) + sbc_result <- SBC::compute_results(datasets, backend, thin_ranks = 1) + draws_etas <- list() + return_lambdas <- list() + for(fit in sbc_result$fits){ + samples <- SBC_fit_to_draws_matrix(fit) + for(target_param in target_params){ + + draws_etas[[target_param]] <- c(draws_etas[[target_param]], posterior::extract_variable(samples, target_param)) + } + } + for(target_param in target_params){ + if(fixed_args$dist_type[[target_param]] == "normal"){ + mu <- mean(draws_etas[[target_param]]) + sigma <- sd(draws_etas[[target_param]]) + return_lambdas[[target_param]] <- list(mu=mu, var=sigma^2) + } + else if(fixed_args$dist_type[[target_param]] == "gamma"){ + gamma_params <- tryCatch( + { + gamma_est = MASS::fitdistr(draws_etas[[target_param]], "gamma")$estimate#, start=list(shape=current_lambdas[[target_param]]$alpha, rate=current_lambdas[[target_param]]$beta))$estimate + alpha = as.numeric(gamma_est["shape"]) + beta = as.numeric(gamma_est["rate"]) + return(list(alpha=alpha, beta=beta)) + }, + error=function(err){ + message(err) + message(sprintf("\ngamma mle estimation for parameter %s failed. Falling back to closed form approximation", target_param)) + return(gamma_estimator(draws_etas[[target_param]])) + } + ) + + return_lambdas[[target_param]] <- gamma_params + } + else if(fixed_args$dist_type[[target_param]] == "lognormal"){ + lognormal_params <- tryCatch( + { + gamma_est = MASS::fitdistr(draws_etas[[target_param]], "lognormal")$estimate + mean = as.numeric(gamma_est["meanlog"]) + sd = as.numeric(gamma_est["sdlog"]) + return(list(mu=mean, sigma=sd)) + }, + error=function(err){ + message(err) + message(sprintf("\nlognormal mle estimation for parameter %s failed. Falling back to closed form approximation", target_param)) + return(lognormal_estimator(draws_etas[[target_param]])) + } + ) + + return_lambdas[[target_param]] <- lognormal_params + } + } + return(list(return_lambdas = return_lambdas, draws_etas = draws_etas)) + } + + ############### + # define update strategies + + normal_str_update <- function(draws_dap_lambdas, lambda, gamma){ + normal_str <- function(Tx, x, gamma){ + #b_t <- (Tx + x) / 2 + #Tx + (1/(b_t - Tx) ^2) * (x - Tx)^3 + (Tx + x)/2 + } + + logalpha_new <- log(normal_str(exp(dap$logalpha), exp(lambda$logalpha), gamma)) + logbeta_new <- log(normal_str(exp(dap$logbeta), exp(lambda$logbeta), gamma)) + + list(logalpha = logalpha_new, logbeta = logbeta_new) + } + + mc_update <- function(draws_dap_lambdas, lambdas){ + draws_dap_lambdas + } + + ############### + lambda_loss <- function(dap_lambdas, new_lambdas) { + #return((dap$mu - lambda$mu)^2 + ((dap$sigma)^2 - exp(lambda$logsigma)^2)^2) + sum((unlist(dap_lambdas) - unlist(new_lambdas))^2) + } + + eta_loss <- function(dap_eta, new_lambdas) { + if("mu" %in% names(new_lambdas)){ # normal + eta <- rnorm(length(dap_eta), mean=new_lambdas$mu, sd = sqrt(new_lambdas$var)) + } + else if("alpha" %in% names(new_lambdas)){ # gamma + eta <- rgamma(length(dap_eta), shape=new_lambdas$alpha, rate = new_lambdas$beta) + } + return(cjs_dist(eta, dap_eta)) + } + + normal_kl_divergence <- function(dap, lambda){ + v_1 <- dap$sigma^2 + v_2 <- lambda$sigma^2 + (dap$mu - lambda$mu)^ 2/(2 * v_2) + 0.5 * ((v_1 / v_2) - log(v_1 / v_2) - 1) + } + + # end function declarations + lambda_current <- init_lambdas + t_df <- list() + for (iter_num in 1:niter) { + stop <- TRUE + t_df$iter <- c(t_df$iter, iter_num) + + dap_result <- calculate_dap(lambda_current) + + dap_tx_plot_list <- list() + dap_tx_plot_index = 1 + for(target_param in target_params){ + param_lambdas <- lambda_current[[target_param]] + lambda_count <- length(param_lambdas) + for(i in 1:lambda_count){ + lambda_colname <- paste(target_param, names(param_lambdas)[i], sep = "_") + t_df[[lambda_colname]] <- c(t_df[[lambda_colname]], param_lambdas[[names(param_lambdas)[i]]]) + } + if (dist_types[[target_param]] == "normal"){ + prior_dist_samples <- rnorm(length(dap_result$draws_etas[[target_param]]), mean=param_lambdas$mu, sd=sqrt(param_lambdas$var)) + } + else if (dist_types[[target_param]] == "gamma"){ + prior_dist_samples <- rgamma(length(dap_result$draws_etas[[target_param]]), shape=param_lambdas$alpha, r=param_lambdas$beta) + } + else if (dist_types[[target_param]] == "lognormal"){ + prior_dist_samples <- rlnorm(length(dap_result$draws_etas[[target_param]]), meanlog = param_lambdas$mu, sdlog = param_lambdas$sigma) + } + + plot_df <- data.frame(dap=dap_result$draws_etas[[target_param]], prior=prior_dist_samples) + plot <- ggplot2::ggplot(plot_df) + ggplot2::geom_density(aes(x=dap), color="red") + ggplot2::geom_density(aes(x=prior)) + ggplot2::ggtitle(sprintf("%s (red=dap)", target_param)) + if(iter_num == 1 || iter_num %% 10 == 0){ + print(plot) + } + dap_tx_plot_list[[dap_tx_plot_index]] <- plot + dap_tx_plot_index <- dap_tx_plot_index + 1 + } + + if(updator == "normal_str_update"){ + stop("Unfinished implementation") + lambda_new <- normal_str_update(dap_result$return_lambdas, lambda_current, gamma) + } + else if(updator == "mc_update"){ + lambda_new <- mc_update(dap_result$return_lambdas, lambda_current) + } + + message(sprintf("Iteration %d:", iter_num)) + for(target_param in target_params){ + param_lambdas <- lambda_current[[target_param]] + + param_lambda_loss <- lambda_loss(dap_result$return_lambdas[[target_param]], param_lambdas) + param_eta_loss <- eta_loss(dap_result$draws_etas[[target_param]], param_lambdas) + + t_df[[paste(target_param, "lambda_loss", sep="_")]] <- c(t_df[[paste(target_param, "lambda_loss", sep="_")]], param_lambda_loss) + t_df[[paste(target_param, "eta_loss", sep="_")]] <- c(t_df[[paste(target_param, "eta_loss", sep="_")]], param_eta_loss) + message(sprintf("parameter %s - lambda loss: %f eta_loss: %f", target_param, param_lambda_loss, param_eta_loss)) + if(all(abs(unlist(param_lambdas) - unlist(dap_result$return_lambdas[[target_param]])) < tol) && iter_num > 1){ + stop <- TRUE && stop + } + else{ + stop <- FALSE + } + } + + if(stop){ + message(sprintf("Terminating self_calib on iteration %d", iter_num)) + break + } + lambda_current <- lambda_new + } + t_df <- as.data.frame(t_df) + return(list(lambda=lambda_current, t_df=t_df)) +} + + + +##' Auto calibrate the initial prior samples using SBC iteration for gaussian approximation +##' +##' @param generator function that generates datasets given each value in `param` +##' @param backend A backend object to use for running SBC +##' @param mixture_means_init_draws_rvars the initial mixture mean draws_rvars +##' @param mixture_bw_init_draws_rvars the initial mixture bandwidth draws_rvars +##' @param nsims_fn function with input: (mixture_means_rvar, mixture_bw_rvar), output: int +##' int is future number of parallel datasets to generate given true and its fitted hyperparameter (mixture_means) +##' @param thin Integer defining thinning parameter +##' @param max_selfcalib_iters the maximum number of iterations to run calibration. if not given will run indefinitely +##' @param save_all_results Boolean if TRUE returns a list of all SBC results, FALSE returns just the result of the last iteration. +##' @param transform_types Transformtype for mixture fitting +##' @param fixed_generator_args *named list* containing additional arguments to pass to generator, *after mixture_means_draws_rvars and mixture_bw_draws_rvars* +##' @export +self_calib_gaussian <- function(generator, backend, mixture_means_init_draws_rvars, mixture_bw_init_draws_rvars, nsims_fn, + thin, max_selfcalib_iters, save_all_results, transform_types, fixed_generator_args){ + if(missing(nsims_fn)){ + nsims_fn <- function(...){300} # Set default # of SBC iterations to 30 + message(paste("number of simulations has been unspecified, using default value of", nsims)) + } + target_params <- posterior::variables(mixture_means_init_draws_rvars) # names of named list, only run calibration for the following parameters + ntarget_params <- length(target_params) + + if(missing(max_selfcalib_iters)){ + max_selfcalib_iters <- Inf + } + + if(missing(save_all_results)){ + save_all_results <- FALSE + } + + selfcalib_itercount <- 1 + sbc_result <- NULL + cjs_record <- list() + for(tp in target_params){ + cjs_record[[tp]] <- c() + } + + if(save_all_results){ + sbc_result_env <- new.env() + } + + while(selfcalib_itercount < max_selfcalib_iters){ + if(selfcalib_itercount == 1){ + nsims <- nsims_fn(1) + mixture_means_draws_rvars <- mixture_means_init_draws_rvars + mixture_bw_draws_rvars <- mixture_bw_init_draws_rvars + }else{ + nsims <- nsims_fn(mixture_means_draws_rvars, mixture_means_next_draws_rvars) + mixture_means_draws_rvars <- mixture_means_next_draws_rvars + mixture_bw_draws_rvars <- mixture_bw_next_draws_rvars + } + message(paste("Running self-calib iteration", selfcalib_itercount, "with nsims =", nsims)) + message("Calling generator..") + datasets <- do.call(generator, c(list(mixture_means_draws_rvars, mixture_bw_draws_rvars), fixed_generator_args)) + message("generator returned value") + sbc_result <- SBC::compute_results(datasets, backend, thin_ranks = thin) + if(save_all_results){ + sbc_result_env[[paste0("result_", selfcalib_itercount)]] <- sbc_result + } + for(s in 1:nsims){ + returned_errors <- sbc_result$errors + if(!is.null(returned_errors[[s]])){ + message("SBC returned 1 or more errors. Terminating and returning the last unsuccessful SBC result...") + return(sbc_result) + } + } + ndraws <- posterior::ndraws(sbc_result$fits[[1]]$draws()) + + mixture_means_next_draws_rvars <- list() + mixture_bw_next_draws_rvars <- list() + for(n_variable in 1:ntarget_params){ + target_param_name <- target_params[[n_variable]] + pooled_draws <- c() + for(s in 1:nsims){ + pooled_draws <- c(pooled_draws, posterior::extract_variable(sbc_result$fits[[s]]$draws(), target_param_name)) + } + #pooled_draws = tf_param_vec(pooled_draws, if (missing(transform_types)) "identity" else transform_types[[target_param_name]]) + pooled_draws = tf_param_vec(pooled_draws, transform_types[[target_param_name]]) + + gmm_fit <- mclust::Mclust(pooled_draws, G = nsims, verbose = FALSE) + prop_est <- gmm_fit$parameters$pro + # vectorize gmmmeans -> sample -> + mixture_means_next_draws_rvars[[target_param_name]] <- update_quantile_approximation(mixture_means_draws_rvars[[target_param_name]], posterior::resample_draws(posterior::rvar(array(rep(as.vector(gmm_fit$parameters$mean), each = nsims), dim = c(nsims, nsims))), weights = prop_est)) + mixture_bw_next_draws_rvars[[target_param_name]] <- update_quantile_approximation(mixture_means_next_draws_rvars[[target_param_name]]) + } + mixture_means_next_draws_rvars <- do.call(draws_rvars, mixture_means_next_draws_rvars) + mixture_bw_next_draws_rvars <- do.call(draws_rvars, mixture_bw_next_draws_rvars) + + stop <- TRUE + for(tp in target_params){ + cjs_record[[tp]] <- c(cjs_record[[tp]], cjs_dist(mixture_means_draws_rvars[[tp]], mixture_means_next_draws_rvars[[tp]])) + if(cjs_record[[tp]][selfcalib_itercount] >= 0.5 * cjs_record[[tp]][1]){ + message(paste("cjs_dist for parameter", tp, ":", cjs_record[[tp]][[selfcalib_itercount]])) + stop <- FALSE + } + } + if(stop){ + message(paste("self_calib terminated on iteration", selfcalib_itercount)) + break + } + selfcalib_itercount <- selfcalib_itercount + 1 + } + return(if(save_all_results) sbc_result_env else sbc_result) +} + +##' Auto calibrate the initial prior samples using SBC iteration and gmm approximation +##' +##' @param generator function that generates datasets given each value in `param` +##' @param backend A backend object to use for running SBC +##' @param mixture_means_init_draws_rvars the initial mixture mean draws_rvars +##' @param mixture_bw_init_draws_rvars the initial mixture bandwidth draws_rvars +##' @param nsims_fn function with input: (mixture_means_rvar, mixture_bw_rvar), output: int +##' int is future number of parallel datasets to generate given true and its fitted hyperparameter (mixture_means) +##' @param thin Integer defining thinning parameter +##' @param max_selfcalib_iters the maximum number of iterations to run calibration. if not given will run indefinitely +##' @param save_all_results Boolean if TRUE returns a list of all SBC results, FALSE returns just the result of the last iteration. +##' @param transform_types Transformtype for mixture fitting +##' @param fixed_generator_args *named list* containing additional arguments to pass to generator, *after mixture_means_draws_rvars and mixture_bw_draws_rvars* +##' @export + self_calib_gmm <- function(generator, backend, mixture_means_init_draws_rvars, mixture_bw_init_draws_rvars, nsims_fn, + thin, max_selfcalib_iters, save_all_results, transform_types, fixed_generator_args){ + if(missing(nsims_fn)){ + nsims_fn <- function(...){300} # Set default # of SBC iterations to 30 + message(paste("number of simulations has been unspecified, using default value of", nsims)) + } + target_params <- posterior::variables(mixture_means_init_draws_rvars) # names of named list, only run calibration for the following parameters + ntarget_params <- length(target_params) + + if(missing(max_selfcalib_iters)){ + max_selfcalib_iters <- Inf + } + + if(missing(save_all_results)){ + save_all_results <- FALSE + } + + selfcalib_itercount <- 1 + sbc_result <- NULL + cjs_record <- list() + for(tp in target_params){ + cjs_record[[tp]] <- c() + } + + if(save_all_results){ + sbc_result_env <- new.env() + } + + while(selfcalib_itercount < max_selfcalib_iters){ + if(selfcalib_itercount == 1){ + nsims <- nsims_fn(1) + mixture_means_draws_rvars <- mixture_means_init_draws_rvars + mixture_bw_draws_rvars <- mixture_bw_init_draws_rvars + }else{ + nsims <- nsims_fn(mixture_means_draws_rvars, mixture_means_next_draws_rvars) + mixture_means_draws_rvars <- mixture_means_next_draws_rvars + mixture_bw_draws_rvars <- mixture_bw_next_draws_rvars + } + message(paste("Running self-calib iteration", selfcalib_itercount, "with nsims =", nsims)) + message("Calling generator..") + datasets <- do.call(generator, c(list(mixture_means_draws_rvars, mixture_bw_draws_rvars), fixed_generator_args)) + message("generator returned value") + sbc_result <- SBC::compute_results(datasets, backend, thin_ranks = thin) + if(save_all_results){ + sbc_result_env[[paste0("result_", selfcalib_itercount)]] <- sbc_result + } + for(s in 1:nsims){ + returned_errors <- sbc_result$errors + if(!is.null(returned_errors[[s]])){ + message("SBC returned 1 or more errors. Terminating and returning the last unsuccessful SBC result...") + return(sbc_result) + } + } + ndraws <- posterior::ndraws(sbc_result$fits[[1]]$draws()) + + mixture_means_next_draws_rvars <- list() + mixture_bw_next_draws_rvars <- list() + for(n_variable in 1:ntarget_params){ + target_param_name <- target_params[[n_variable]] + pooled_draws <- c() + for(s in 1:nsims){ + pooled_draws <- c(pooled_draws, posterior::extract_variable(sbc_result$fits[[s]]$draws(), target_param_name)) + } + #pooled_draws = tf_param_vec(pooled_draws, if (missing(transform_types)) "identity" else transform_types[[target_param_name]]) + pooled_draws = tf_param_vec(pooled_draws, transform_types[[target_param_name]]) + + gmm_fit <- mclust::Mclust(pooled_draws, G = nsims, verbose = FALSE) + prop_est <- gmm_fit$parameters$pro + # vectorize gmmmeans -> sample -> + mixture_means_next_draws_rvars[[target_param_name]] <- update_quantile_approximation(mixture_means_draws_rvars[[target_param_name]], posterior::resample_draws(posterior::rvar(array(rep(as.vector(gmm_fit$parameters$mean), each = nsims), dim = c(nsims, nsims))), weights = prop_est)) + mixture_bw_next_draws_rvars[[target_param_name]] <- update_bw(mixture_means_next_draws_rvars[[target_param_name]]) + } + mixture_means_next_draws_rvars <- do.call(draws_rvars, mixture_means_next_draws_rvars) + mixture_bw_next_draws_rvars <- do.call(draws_rvars, mixture_bw_next_draws_rvars) + + stop <- TRUE + for(tp in target_params){ + cjs_record[[tp]] <- c(cjs_record[[tp]], cjs_dist(mixture_means_draws_rvars[[tp]], mixture_means_next_draws_rvars[[tp]])) + if(cjs_record[[tp]][selfcalib_itercount] >= 0.5 * cjs_record[[tp]][1]){ + message(paste("cjs_dist for parameter", tp, ":", cjs_record[[tp]][[selfcalib_itercount]])) + stop <- FALSE + } + } + if(stop){ + message(paste("self_calib terminated on iteration", selfcalib_itercount)) + break + } + selfcalib_itercount <- selfcalib_itercount + 1 + } + return(if(save_all_results) sbc_result_env else sbc_result) + } + +# fucntion(mixture_means_rvar, mixture_bw_rvar, mixture_mean_hat_rvar, mixture_bw_hat_rvar) possible +update_means <- function(mixture_means_rvar, mixture_means_hat_rvar){ + return(mixture_means_rvar * mean(mixture_means_rvar/ mixture_means_hat_rvar)) +} +update_bw <- function(mixture_means_next_rvars){ + return (rvar(rep(bw.nrd0(draws_of(mixture_means_next_rvars)), posterior::niterations(mixture_means_next_rvars)))) +} +################ +# quantile approximation + +#' Given a vector of draws, return a vector of length S of phi which best approximates the CDF. +#' @param draws a vector of sample draws from a distribution +#' @param S number of quantile points +#' @return vector of phi which are quantile function values +approx_quantile_phi <- function(draws, S) { + probs <- unlist(lapply(c(1:S), function(x) {(2 * x - 1) / (2 * S)})) # generate (tau_i + tau_{i+1})/2 + return(quantile(draws, probs, names = FALSE)) +} + +#' Given a vector of phis, which represent the quantiles of equally spaced probabilities on [0, 1] defined as i/S, return a function that returns N random samples from the quantile function +#' @param N number of samples to draw, returned as a vector of length N +#' @param phis vector of phis to sample from +#' @return a vector of samples drawn by inverse transform sampling +sample_quantile_phi <- function(N, phis) { + return(sample(phis, N, replace = TRUE)) +} + + +#' Calculate qualtile huber loss for a given tau tau_{s_index} +#' @param phi_prior vector of phis for the prior(pre-transformation) distribution +#' @param phi_post vector of phis for the posterior(post-transformation) distribution +#' @param s_index The tau index to calculate loss +#' @param k interval to calculate loss. resulting loss will be in range \[-k, k\] +#' @param S the number of phis. equal to length(phi_prior) = length(phi_post) +#' @param n_post_samples the number of samples to draw from posterior, to approximate the expected huber loss +#' @return a vector of length 2, where the first value is the expected huber loss and the second value the number of posterior samples less than the quantile value at phi\[s_index\] +quantile_huber_loss <- function(phi_prior, phi_post, s_index, k, S, n_post_samples) { + summed_rho_mean <- 0 + zprime_delta <- 0 + for(n in 1:n_post_samples){ + zprime <- sample(phi_post, 1) + u <- zprime - phi_prior[s_index] + delta <- sum(u < 0) + tau_hat <- s_index / S + huber_loss <- if (abs(u) <= k ) 1/2 * u ** 2 else k * (abs(u) - 1/2 * k) + rho <- abs(tau_hat - u) * huber_loss / k + + summed_rho_mean <- summed_rho_mean + rho + zprime_delta <- zprime_delta + if(zprime < phi_prior[s_index]) 1 else 0 + } + return(c(summed_rho_mean / n_samples, zprime_delta)) +} + +#' Update mixture mean through quantile approxiation based on analytic gradient of quantile loss +#' +#' @param hyperparam_rvar a posterior::rvar object of prior(pre-transformation) mixture mean values +#' @param hyperparam_hat_rvar a posterior::rvar object of posterior(post-transformation) mixture mean values +#' @param S Number of approximation points for the target quantile function. +#' @param n_post_samples the number of samples to draw from posterior, to approximate the expected quantile loss +#' @param epsilon gradient update coefficient +#' @return a posterior::rvar object with the same dimension as the input rvars. +#' @export +update_quantile_approximation <- function(hyperparam_rvar, hyperparam_hat_rvar, S, n_post_samples, epsilon) { + phi <- approx_quantile_phi(hyperparam_rvar, S = S) + phi_post <- approx_quantile_phi(hyperparam_hat_rvar, S = S) + updated_phi <- phi + wass_last <- wasserstein(updated_phi, phi_post) + iters <- 0 + while(iters <= 5 || abs(wasserstein(updated_phi, phi_post) - wass_last) > wass_last * 0.001){ + wass_last <- wasserstein(updated_phi, phi_post) + #plot(phi, unlist(lapply(c(1:S), function(x) {(2 * x - 1) / (2 * S)})), type = "l") + #lines(updated_phi, unlist(lapply(c(1:S), function(x) {(2 * x - 1) / (2 * S)})), col="red") + zprime <- sample_quantile_phi(n_post_samples, updated_phi) + for(s in 1:S) { + # delta_sum <- 0 + # for(m in 1:n_post_samples){ + # zprime_delta <- sum(zprime[m] < updated_phi[s]) + # delta_sum <- delta_sum + ((2 * s - 1) / (2 * S) - zprime_delta / n_post_samples + if (zprime[m] - updated_phi[s] == 0) 1 else 0) + # } + delta_sum <-(2 * s - 1) / (2 * S) - (sum(zprime < updated_phi[s]) + sum(zprime == updated_phi[s])) / n_post_samples + updated_phi[s] <- updated_phi[s] + epsilon * (delta_sum / n_post_samples) + #zprime <- sample_quantile_phi(n_post_samples, updated_phi) + #zprime_delta <- sum(zprime < updated_phi[s]) + #print(paste(zprime_delta / n_post_samples, "/", (2 * s - 1) / (2 * S))) + #updated_phi[s] <- updated_phi[s] + epsilon * ((2 * s - 1) / (2 * S) - zprime_delta / n_post_samples) # (tau_{i - 1} + tau_i) / S = (s / S + (s - 1) / S) / 2 + + #phi_delta <- c(phi_delta, ((2 * s - 1) / (2 * S) - zprime_delta / n_post_samples)) + } + iters <- iters + 1 + #message(updated_phi) + #message(paste(wasserstein(updated_phi, phi_post), wass_last)) + } + print(paste("optimization iters:", iters)) + return(list(updated_phi=posterior::rvar(array(rep(updated_phi, each = nsims), dim = c(nsims, nsims))), phi=phi)) # currently all nsims receive same updated mus +} diff --git a/R/util.R b/R/util.R index 7038e64..c28d7ae 100644 --- a/R/util.R +++ b/R/util.R @@ -14,6 +14,89 @@ combine_args <- function(args1, args2) { } } +##' Transform parameters from constrained to uncontrained +##' +##' @param param `draws_rvars` type parameter values +##' @return list of uncontrained parameters and transformation type +##' @export +tf_param <- function(param){ + tf <- list() + for (tv in names(param)){ + if(all(param[[tv]] > 0) & all(param[[tv]] < 1)){ + tf[[tv]] <- "logit" + param[[tv]] <- gtools::logit(param[[tv]]) + }else if(all(param[[tv]] > 0)){ + tf[[tv]] <- "log" + param[[tv]] <- log(param[[tv]]) + } + } + return (list(param = param, tf = tf)) +} + +##' Transform parameters from constrained to uncontrained +##' +##' @param param a vector +##' @param tf string indicating transformation type +##' @return list containing uncontrained parameters and transformation type +##' @export +tf_param_vec <- function(param, tf){ + if(is.null(tf) || missing(tf)){ + param <- param + } + else if(tf == "logit"){ + param <- gtools::logit(param) + }else if(tf == "log"){ + param <- log(param) + } + return (param) +} + +##' Inverse transform parameters from uncontrained to constrained +##' +##' @param param a vector +##' @param link_type int indicating link type +##' @return constrained parameter vector +##' @export +invtf_param_vec <- function(param, link_type){ + if(is.null(link_type) || missing(link_type)){ + param <- param + } + else if(link_type == 1){ + param <- brms:::inv_logit(param) + } else if (link_type == 2) { + param = dnorm(param) + } else if (link_type == 3) { + param = brms:::inv_cloglog(eta) + } + param +} + + +#'Maximal coupling of two univariate Normal distributions +#'from https://github.com/pierrejacob/debiasedhmc/blob/1a2eeeb041eea4e5c050e5188e7100f31e61e35b/R/gaussian_couplings.R +#'@description Sample from maximal coupling of two univariate Normal distributions, +#'specified through their means and standard deviations. +#'@param mu1 mean of first distribution +#'@param mu2 mean of second distribution +#'@param sigma1 standard deviation of first distribution +#'@param sigma2 standard deviation of second distribution +#' +#'@export +rnorm_max_coupling <- function(mu1, mu2, sigma1, sigma2){ + x <- rnorm(1, mu1, sigma1) + if (dnorm(x, mu1, sigma1, log = TRUE) + log(runif(1)) < dnorm(x, mu2, sigma2, log = TRUE)){ + return(c(x,x)) + } else { + reject <- TRUE + y <- NA + while (reject){ + y <- rnorm(1, mu2, sigma2) + reject <- (dnorm(y, mu2, sigma2, log = TRUE) + log(runif(1)) < dnorm(y, mu1, sigma1, log = TRUE)) + } + return(c(x,y)) + } +} + SBC_error <- function(subclass, message, call = sys.call(-1), ...) { structure( diff --git a/README.md b/README.md index ad8407b..1da10e8 100755 --- a/README.md +++ b/README.md @@ -1,59 +1,122 @@ # Simulation-based Calibration: SBC -## Efficient simulation-based calibration for Bayesian models -SBC provides tools to easily validate and offer corrections on prior, likelihood, and computation algorithms based on the self-recovering property of Bayesian models. This package contains tools such as SBC rank histograms, ECDF plots, and their summary statistics which can be used to assess computational faithfulness. -### Varieties of calibrations: scope of this package -Calibration (i.e. reliability) is not a sufficient condition for a good forecast but a minimal property that any forecast should satisfy (FV1998). It serves as a bootstrap for model development and its method and target varies. Target is chosen as modeler's quantity of interest and directly affects the calibrated result as reward in reinforcement learning. Method depends on how much you marginalized or conditioned the full joint space to test coverage. Scope of this package is checked below. +SBC provides tools to validate your Bayesian model and/or a sampling algorithm via the self-recovering property of Bayesian models. This package lets you run SBC easily and perform postprocessing and visualisations of the results to assess computational faithfulness. -## Interface and Usage +## Installation -SBC is designed to be primarily used with [Stan](https://mc-stan.org/) models, offering a highly customizable interface to integrate Simulation Based Calibration into existing Bayesian workflows with minimal effort. Its main feature is the `api` interface, which defines a fully-blown SBC pipeline starting from dataset generation to posterior sampling. Once a user has a valid Stan model and a minor R function defining the data generating process(referred to as `Generator`), running SBC becomes as simple as: +To install the development version of SBC, run +```r +devtools::install_github("hyunjimoon/SBC") ``` -n_datasets <- 100 # Number of SBC iterations to run - -sbc_generator <- SBC::function_SBC_generator(Generator) -sbc_dataset <- SBC::generate_datasets( - sbc_generator, - n_datasets) - -cmdstan_backend <- SBC::cmdstan_sample_SBC_backend( - cmdstan_model, iter_warmup = 1000, iter_sampling = 1000) - -results <- SBC::compute_results(sbc_dataset, cmdstan_backend) -plot_rank_hist(results) + + +## Quick tour + +To use SBC, you need a piece of code that generates simulated data that should +match your model (a _generator_) and a statistical model + algorithm + +algorithm parameters that can fit the model to data (a _backend_). SBC then lets you +discover when the backend and generator don't encode the same data generating process +(up to [certain limitations](https://hyunjimoon.github.io/SBC/articles/limits_of_SBC.html)). + +For a quick example, we'll use a simple generator producing normally-distributed +data (basically `y <- rnorm(N, mu, sigma)`) with a backend in Stan that mismatches +the generator by wrongly assuming Stan parametrizes the normal distribution via +precision (i.e. it has `y ~ normal(mu, 1 / sigma ^ 2)`). + +```r +library(SBC) +gen <- SBC_example_generator("normal") +# interface = "cmdstanr" or "rjags" is also supported +backend_bad <- SBC_example_backend("normal_bad", interface = "rstan") ``` -For detailed usage, please refer to the included vignettes. +_Note: Using the `cmdstanr` interface, a small number of rejected steps will be reported. Those are false positives and do not threaten validity (they happen during warmup). This is a result of difficulties in parsing the output of `cmdstanr`. We are working on a resolution._ -### Compatibility -Currently `SBC` supports `cmdstan`, `rstan`, and `brms` models out of the box. However, adding Backends for other platforms is supported. +You can use `SBC_print_example_model("normal_bad")` to inspect the model used. -## Installation -To install the development version of SBC, run +We generate 50 simulated datasets and perform SBC: + +```r +ds <- generate_datasets(gen, n_sims = 50) +results_bad <- compute_SBC(ds, backend_bad) ``` -devtools::install_github("hyunjimoon/SBC") + +The results then give us diagnostic plots that immediately show a problem: +the distribution of SBC ranks is not uniform as witnessed by both the rank histogram +and the difference between sample ECDF and the expected deviations from theoretical CDF. + +```r +plot_rank_hist(results_bad) +plot_ecdf_diff(results_bad) +``` + +We can then run SBC with a backend that uses the correct parametrization +(i.e. with `y ~ normal(mu, sigma)`): + +```r +backend_sd <- SBC_example_backend("normal_sd", interface = "rstan") +results_sd <- compute_SBC(ds, backend_sd) + +plot_rank_hist(results_sd) +plot_ecdf_diff(results_sd) +``` + +The diagnostic plots show no problems in this case. As with any other +software test, we can observe clear failures, but absence of failures does not imply +correctness. We can however make the SBC check more thorough by using a lot of +simulations and including suitable generated quantities to guard against +[known limitations of vanilla SBC](https://hyunjimoon.github.io/SBC/articles/limits_of_SBC.html). + +## Paralellization + +The examples above are very fast to compute, but in real use cases, +you almost certainly want to let the computation run in parallel via the +[`future`](https://future.futureverse.org/) package. + +```r +library(future) +plan(multisession) ``` -from your R console. -### References: -Theoretical support -* [Validating Bayesian Inference Algorithms with Simulation-Based Calibration](https://arxiv.org/pdf/1804.06788.pdf) Talts, Betancourt, Simpson, Vehtari, Gelman, 2018 -* [Graphical Test for Discrete Uniformity and its Applications in Goodness of Fit Evaluation and Multiple Sample Comparison](https://arxiv.org/abs/2103.10522) Säilynoja, Bürkner, Vehtari, 2021 -* [Bayesian Workflow](https://arxiv.org/abs/2011.01808), Gelman et al., 2020 -* [Toward a principled Bayesian workflow in cognitive science](https://psycnet.apa.org/record/2020-43606-001) Schad, Betancourt, Vasishth, 2021 -* [Bayes factor workflow](https://arxiv.org/pdf/2103.08744.pdf) Schad, Nicenboim, Bürkner, Betancourt, Vasishth, 2021 +## More information + +The [package vignettes](https://hyunjimoon.github.io/SBC/articles/) provide +additional context and examples. Notably: -Application support -* [Cognitive science, response time fitting](https://link.springer.com/content/pdf/10.3758/s13428-019-01318-x.pdf) -* [Bioinformatics, effect of mutation prediction](https://www.biorxiv.org/content/10.1101/2020.10.27.356758v1.full.pdf) -* [Earth science, earthquake prediction](https://gmd.copernicus.org/articles/11/4383/2018/gmd-11-4383-2018.pdf ) -* [Sequential Neural Likelihood](http://proceedings.mlr.press/v89/papamakarios19a/papamakarios19a.pdf) +- [The main vignette](https://hyunjimoon.github.io/SBC/articles/SBC.html) +has more theoretical background and instructions how to integrate your own simulation code and +models with SBC. +- [Small model workflow](https://hyunjimoon.github.io/SBC/articles/small_model_workflow.html) +discusses how SBC integrates with model implementation workflow and how you can +use SBC to safely develop complex models step-by-step. -Vignette -* [ECDF with codes](https://avehtari.github.io/rhat_ess/rhat_ess.html) (new implementation by Teemu Säilynoja will be available in `bayesplot` and `SBC` package soon) +Currently `SBC` supports `cmdstanr`, `rstan`, and `brms` models out of the box. +With a little additional work, you can integrate SBC with any exact or approximate fitting method as shown in the [Implementing backends vignette](https://hyunjimoon.github.io/SBC/articles/implementing_backends.html). + + + +## References + +* Theoretical support + * [Validating Bayesian Inference Algorithms with Simulation-Based Calibration](https://arxiv.org/pdf/1804.06788.pdf) Talts, Betancourt, Simpson, Vehtari, Gelman, 2018 + * [Graphical Test for Discrete Uniformity and its Applications in Goodness of Fit Evaluation and Multiple Sample Comparison](https://arxiv.org/abs/2103.10522) Säilynoja, Bürkner, Vehtari, 2021 + * [Bayesian Workflow](https://arxiv.org/abs/2011.01808), Gelman et al., 2020 + * [Toward a principled Bayesian workflow in cognitive science](https://psycnet.apa.org/record/2020-43606-001) Schad, Betancourt, Vasishth, 2021 + * [Bayes factor workflow](https://arxiv.org/pdf/2103.08744.pdf) Schad, Nicenboim, Bürkner, Betancourt, Vasishth, 2021 + +* Application support + * [Cognitive science, response time fitting](https://link.springer.com/content/pdf/10.3758/s13428-019-01318-x.pdf) + * [Bioinformatics, effect of mutation prediction](https://www.biorxiv.org/content/10.1101/2020.10.27.356758v1.full.pdf) + * [Earth science, earthquake prediction](https://gmd.copernicus.org/articles/11/4383/2018/gmd-11-4383-2018.pdf ) + * [Sequential Neural Likelihood](http://proceedings.mlr.press/v89/papamakarios19a/papamakarios19a.pdf) ## FAQ + > How does calibration relate to prediction accuracy? Comparing the ground truth and the simulated result is a backbone of calibration and comparison target greatly affects the calibrated (i.e. trained) result, similar to reward in reinforcement learning. In this sense, if the U(a(y), theta) term is designed for prediction, the model will be calibrated to have best predictive result as possible. + +## Acknowledgements + +Development of this package was supported by [ELIXIR CZ](https://www.elixir-czech.cz/) research infrastructure project (Ministry of Youth, Education and Sports of the Czech Republic, Grant No: LM2018131) including access to computing and storage facilities. diff --git a/_pkgdown.yml b/_pkgdown.yml index 6652d75..79ac2b3 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,11 +1,14 @@ +url: https://hyunjimoon.github.io/SBC/ template: params: bootswatch: cosmo navbar: - title: "brms" + title: "SBC" left: + - text: "Get Started" + href: articles/SBC.html - text: "Vignettes" href: articles/index.html - text: "Functions" @@ -52,7 +55,8 @@ articles: desc: > Understanding the basic usage and philosophy of the package contents: - - basic_usage + - SBC + - rank_visualizations - title: "Using SBC for debugging/validating Stan models" desc: > Case studies showing how problems in a Stan model can be discovered with SBC. @@ -66,7 +70,7 @@ articles: - computational_algorithm1 - implementing_backends - brms - - discrete_params + - discrete_vars - rejection_sampling reference: @@ -84,20 +88,26 @@ reference: - title: Computation & results desc: Functions related to running the SBC computation and handling the results. - contents: - - compute_results + - compute_SBC + - contains("SBC_results") - generated_quantities - - statistics_from_single_fit - - recompute_statistics - - contains("results") + - SBC_statistics_from_single_fit + - recompute_SBC_statistics + - bind_results - calculate_ranks_draws_matrix - contains("diagnostic") - - starts_with("default_") + - default_chunk_size + - default_cores_per_fit - title: Plotting & Summarising desc: Plotting and summarising results - contents: - contains("plot") - - guess_bins + - guess_rank_hist_bins - empirical_coverage +- title: Examples + desc: Functions to let you easily test the pacakge +- contents: + - contains("example") - title: Miscellaneous - contents: - wasserstein diff --git a/docs/404.html b/docs/404.html index c361cab..8c2727e 100644 --- a/docs/404.html +++ b/docs/404.html @@ -1,66 +1,27 @@ - - - - + + + + - Page not found (404) • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + - - - - -
-
- + +
+ + + - - -
+
+
-
- +
+ + - - diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index b6e8733..68f4eb4 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -1,66 +1,12 @@ - - - - - - - -License • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -License • SBC + + - - - - -
-
- -
- -
+
+
-
- +
- - + + diff --git a/docs/LICENSE.html b/docs/LICENSE.html index af3e1d9..2ea7d03 100644 --- a/docs/LICENSE.html +++ b/docs/LICENSE.html @@ -1,66 +1,12 @@ - - - - - - - -MIT License • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -MIT License • SBC + + - - - - -
-
- -
- -
+
+
-
- +
- - + + diff --git a/docs/apple-touch-icon-120x120.png b/docs/apple-touch-icon-120x120.png deleted file mode 100644 index bcf2cda..0000000 Binary files a/docs/apple-touch-icon-120x120.png and /dev/null differ diff --git a/docs/apple-touch-icon-152x152.png b/docs/apple-touch-icon-152x152.png deleted file mode 100644 index 0ba0675..0000000 Binary files a/docs/apple-touch-icon-152x152.png and /dev/null differ diff --git a/docs/apple-touch-icon-180x180.png b/docs/apple-touch-icon-180x180.png deleted file mode 100644 index 5da4301..0000000 Binary files a/docs/apple-touch-icon-180x180.png and /dev/null differ diff --git a/docs/apple-touch-icon-60x60.png b/docs/apple-touch-icon-60x60.png deleted file mode 100644 index 06c19af..0000000 Binary files a/docs/apple-touch-icon-60x60.png and /dev/null differ diff --git a/docs/apple-touch-icon-76x76.png b/docs/apple-touch-icon-76x76.png deleted file mode 100644 index ed72332..0000000 Binary files a/docs/apple-touch-icon-76x76.png and /dev/null differ diff --git a/docs/apple-touch-icon.png b/docs/apple-touch-icon.png deleted file mode 100644 index 89a0306..0000000 Binary files a/docs/apple-touch-icon.png and /dev/null differ diff --git a/docs/articles/basic_usage.html b/docs/articles/SBC.html similarity index 51% rename from docs/articles/basic_usage.html rename to docs/articles/SBC.html index baa1838..ae2fe4b 100644 --- a/docs/articles/basic_usage.html +++ b/docs/articles/SBC.html @@ -5,13 +5,13 @@ -SBC Interface Introduction • SBC +Getting Started with SBC • SBC - + + +
+
-
-

-What is SBC?

-

SBC stands for “simulation-based calibration” and it is a tool to validate statistical models and/or algorithms fitting those models. In SBC we are given a statistical model, a method to generate samples from the prior predictive distribution (i.e. generate simulated datasets that match the model’s priors + likelihood) and an algorithm that fits the model to data.

-

The rough sketch of SBC is that we simulate some datasets and then for each dataset:

+
+

What is SBC? +

+

SBC stands for “simulation-based calibration” and it is a tool to validate statistical models and/or algorithms fitting those models. In SBC we are given a statistical model, a method to generate draws from the prior predictive distribution (i.e. generate simulated datasets that match the model’s priors + likelihood) and an algorithm that fits the model to data.

+

The rough sketch of SBC is that we simulate some datasets and then for each simulated dataset:

    -
  1. Fit the model and obtain \(S\) independent samples from the posterior.
  2. -
  3. For each parameter, take the rank of the simulated parameter value within the posterior samples +
  4. Fit the model and obtain \(D\) independent draws from the posterior.
  5. +
  6. For each variable of interest, take the rank of the simulated value within the posterior draws
      -
    • Where rank is defined as the number of samples < simulated value
    • +
    • Where rank is defined as the number of draws < simulated value
-

It can be shown that if model matched the generator and algorithm works correctly, then for each parameter, the ranks obtained in SBC should be uniformly distributed between \(0\) and \(S\). This corresponds quite directly to claims like “the posterior 84% credible interval should contain the simulated value in 84% of simulations”, the rank uniformity represents this claim for all interval widths at once. The theory of SBC is fully described in Talts et al.

+

It can be shown that if model matched the generator and algorithm works correctly, then for each variable, the ranks obtained in SBC should be uniformly distributed between \(0\) and \(S\). This corresponds quite directly to claims like “the posterior 84% credible interval should contain the simulated value in 84% of simulations”, the rank uniformity represents this claim for all interval widths at once. The theory of SBC is fully described in Talts et al.

This opens two principal use-cases of SBC:

  1. We have an algorithm that we trust is correct and a generator and and we want to check that we correctly implemented our Bayesian model
  2. We have a generator and a model we trust and we want to check whether a given algorithm correctly computes the posterior.

In any case, a failure of SBC only tells us that at least one of the three pillars of our inference (generator/model/algorithm) is mismatched to others.

-

In the context of larger Bayesian workflow (as discussed e.g. in Bayesian Workflow by Gelman et al.  or Towards A Principled Bayesian Workflow by Betancourt), SBC can be used to validate the implementation of a model/algorithm, which is just one of many things to check if one needs a robust analysis. In particular, SBC does not use any real data and thus cannot tell you anything about potential mismatch between your model and the actual data you plan to analyze. However, this is in some sense an advantage: if our model fails (e.g. we have convergence problems) on real data, we don’t know whether the problem is a bug in our model or a mismatch between the model and the data. If we simulate data exactly as the model assumes, any problem has to be a bug. Additionally, we can use SBC to better understand whether the data we plan to collect are actually capable of answering the questions we have.

+

In the context of larger Bayesian workflow (as discussed e.g. in Bayesian Workflow by Gelman et al.  or Towards A Principled Bayesian Workflow by Betancourt), SBC can be used to validate the implementation of a model/algorithm, which is just one of many things to check if one needs a robust analysis. In particular, SBC does not use any real data and thus cannot tell you anything about potential mismatch between your model and the actual data you plan to analyze. However, this is in some sense an advantage: if our model fails (e.g. we have convergence problems) on real data, we don’t know whether the problem is a bug in our model or a mismatch between the model and the data. If we simulate data exactly as the model assumes, any problem has to be a bug. Additionally, we can use SBC to better understand whether the data we plan to collect are actually capable of answering the questions we have.

This vignette will demonstrate how the basic package interface can be used to run simulations, calculate ranks and investigate calibration.

-
-

-Aims of the package

-

The SBC package aims to provide a richer and more usable alternative to rstan::sbc(). The main design goals is to make it easy to incorporate SBC in your everyday modelling workflow. To this end:

+
+

Aims of the package +

+

The SBC package aims to provide a richer and more usable alternative to rstan::sbc(). The main design goals is to make it easy to incorporate SBC in your everyday modelling workflow. To this end:

  • No changes to your model are needed to test it with SBC.
  • Once you have your model and code to simulate data ready, it is easy to gradually move from 1 simulation to check your model does not crash to 1000 simulations that can resolve even small inaccuracies.
  • @@ -164,53 +169,73 @@

    We intentionally do not focus on mechanisms that would let you automatically construct a simulator just from your model: if we did that, any bugs in your model would automatically carry over to the simulator and the SBC would only check that the algorithm works. Instead we believe it is good practice to implement the simulator in the most easy way possible while altering aspects of the implementation that should not matter (e.g. for loops vs. matrix multiplication). The best solution would be to have one person write the simulator and a different person the model (though that would often be impractical). This way you get two independent pieces of code that should correspond to the same data generating process and it becomes less likely that there is the same mistake in both versions. A mistake that is in just one version can then be (at least in principle) caught by SBC.

    This is actually a well known pattern in software safety: critical components in airplanes are required to have two completely independent implementations of the same software (or even hardware) and the system checks that both produce the same output for the same input. Similarly, pharmaceutical companies analyzing drug trials are required to have the data analysis pipeline written by two separate teams and the results of both must match (this is not required for academic trials - who would need safety there, right?). The main reason this method is used relatively rarely is that implementing the same thing twice is costly. But statistical models are usually relatively small pieces of code and the added cost of the second implementation (the generator) thus tends to very small.

-
-

-Overview of the Architecture

+
+

Naming +

+

To avoid confusion the package and the docs tries to consistently give the same meaning to the following potentially ambiguous words:

+
    +
  • +variable All quantities of interest for SBC - this includes both parameters that are directly estimated by the model and quantities derived from those parameters.
  • +
  • +draws are assumed to come from either a single realized posterior distribution of a fitted model or the prior distribution of the model. The number of draws ( n_draws) is the number of posterior draws produced by fitting the model.
  • +
  • +simulation / sim a set of simulated values for all variables and the accompanying generated data. I.e. the number of simulations (n_sims) is the number of times an individual model is fitted
  • +
  • +fit represents the result of fitting a single simulation
  • +
+
+
+

Overview of the Architecture +

Overview of the package structure

-

To perform SBC, one needs to first generate simulated datasets and then fit the model to those datasets. The SBC_datasets object holds the simulated prior and data samples. SBC_datasets objects can be created directly by the user, but it is often easier to use one of provided Generator implementations that let you e.g. wrap a function that returns the parameters and simulated data for a single dataset or use a brms specification to generate samples corresponding to a given brms model.

-

The other big part of the process is a backend. The SBC package uses a backend object to actually fit the model to the simulated data and generate posterior samples. In short, backend bunches together the algorithm in which inference is ran (cmdstanr, rstan, brms, jags, etc.), the model, and additional platform-specific inference parameters which are necessary to run inference for the model-platform combination (e.g. number of iterations, initial values, …). In other words backend is a function that takes data as its only input and provides posterior samples.

-

Once we have a backend and an SBC_datasets instance, we can call compute_results to actually perform the SBC. The resulting object can then be passed to various plotting and summarising functions to let us easily learn if our model works as expected.

+

To perform SBC, one needs to first generate simulated datasets and then fit the model to those simulations. The SBC_datasets object holds the simulated prior and data draws. SBC_datasets objects can be created directly by the user, but it is often easier to use one of provided Generator implementations that let you e.g. wrap a function that returns the variables and observed data for a single simulation or use a brms specification to generate draws corresponding to a given brms model.

+

The other big part of the process is a backend. The SBC package uses a backend object to actually fit the model to the simulated data and generate posterior draws. In short, backend bundles together the algorithm in which inference is run (cmdstanr, rstan, brms, jags, etc.), the model, and additional platform-specific inference parameters which are necessary to run inference for the model-platform combination (e.g. number of iterations, initial values, …). In other words backend is a function that takes data as its only input and provides posterior draws.

+

Once we have a backend and an SBC_datasets instance, we can call compute_SBC to actually perform the SBC. The resulting object can then be passed to various plotting and summarising functions to let us easily learn if our model works as expected.

-
-

-Simple Poisson Regression

+
+

Simple Poisson Regression +

In this vignette we will demonstrate how the interface is used with a simple poisson model. First we’ll setup and configure our environment.

-library(SBC);
+library(SBC);
 
-use_cmdstanr <- TRUE # Set to false to use rstan instead
+use_cmdstanr <- getOption("SBC.vignettes_cmdstanr", TRUE) # Set to false to use rstan instead
 
 if(use_cmdstanr) {
-  library(cmdstanr)
+  library(cmdstanr)
 } else {
-  library(rstan)
+  library(rstan)
+  rstan_options(auto_write = TRUE)
 }
 
-options(mc.cores = parallel::detectCores())
+options(mc.cores = parallel::detectCores())
 
 # Enabling parallel processing via future
-library(future)
-plan(multisession)
+library(future)
+plan(multisession)
 
 # The fits are very fast,
 # so we force a minimum chunk size to reduce overhead of
 # paralellization and decrease computation time.
-options(SBC.min_chunk_size = 5)
+options(SBC.min_chunk_size = 5)
 
 # Setup caching of results
-cache_dir <- "./basic_usage_SBC_cache"
-if(!dir.exists(cache_dir)) {
-  dir.create(cache_dir)
+if(use_cmdstanr) {
+  cache_dir <- "./_basic_usage_SBC_cache"
+} else {
+  cache_dir <- "./_basic_usage_rstan_SBC_cache"
+}
+if(!dir.exists(cache_dir)) {
+  dir.create(cache_dir)
 }
-
-

-Model Setup

+
+

Model Setup +

We will be running SBC against a model that defines y ~ Poisson(lambda), where lambda ~ Gamma(15, 5). We will use the following Stan model:

-cat(readLines("stan/poisson.stan"), sep = "\n")
+cat(readLines("stan/poisson.stan"), sep = "\n")
data{
   int N;
   int y[N];
@@ -224,49 +249,49 @@ 

}

 if(use_cmdstanr) {
-  cmdstan_model <- cmdstanr::cmdstan_model("stan/poisson.stan")
+  cmdstan_model <- cmdstanr::cmdstan_model("stan/poisson.stan")
 } else {
-  rstan_model <- rstan::stan_model("stan/poisson.stan")
+  rstan_model <- rstan::stan_model("stan/poisson.stan")
 }
-
-

-Generator

-

Once we have defined the model, we can create a generator function which will generate prior and data samples:

+
+

Generator +

+

Once we have defined the model, we can create a generator function which will generate prior and data draws:

-# A generator function should return a named list containing elements "parameters" and "generated"
+# A generator function should return a named list containing elements "variables" and "generated"
 
 poisson_generator_single <- function(N){  # N is the number of data points we are generating
-  lambda <- rgamma(n = 1, shape = 15, rate = 5)
-  y <- rpois(n = N, lambda = lambda)
-  list(
-    parameters = list(
+  lambda <- rgamma(n = 1, shape = 15, rate = 5)
+  y <- rpois(n = N, lambda = lambda)
+  list(
+    variables = list(
       lambda = lambda
     ),
-    generated = list(
+    generated = list(
       N = N,
       y = y
     )
   )
 }
-

As you can see, the generator returns a named list containing random samples from the prior and generated data realized from the prior samples - the data are already in the format expected by Stan.

+

As you can see, the generator returns a named list containing random draws from the prior and generated data realized from the prior draws - the data are already in the format expected by Stan.

-
-

-Create SBC_Datasets from generator

+
+

Create SBC_datasets from generator +

SBC provides helper functions SBC_generator_function and generate_datasets which takes a generator function and calls it repeatedly to create a valid SBC_datasets object.

-set.seed(54882235)
-n_datasets <- 100  # Number of SBC iterations to run
+set.seed(54882235)
+n_sims <- 100  # Number of SBC iterations to run
 
 poisson_generator <- SBC_generator_function(poisson_generator_single, N = 40)
 poisson_dataset <- generate_datasets(
   poisson_generator, 
-  n_datasets)
+ n_sims)
-
-

-Defining backend

+
+

Defining backend +

Once we have the model compiled we’ll create a backend object from the model. SBC includes pre-defined backend objects for HMC sampling with cmdstan and rstan. In addition, it also provides generator function and backend for brms based models.

Note that you can create your own backend if you wish to use a different sampling/optimization platform, such as variational inference or JAGS.

Here we’ll just use the pre-defined cmdstan backend, in which we pass our compiled model and any additional arguments we would like to pass over to the sampling method:

@@ -279,67 +304,72 @@

rstan_model, iter = 2000, warmup = 1000, chains = 2) }

-
-

-Computing Ranks

-

we can then use compute_results to fit our datasets with the backend:

+
+

Computing Ranks +

+

we can then use compute_SBC to fit our simulations with the backend:

-results <- compute_results(poisson_dataset, poisson_backend, 
+results <- compute_SBC(poisson_dataset, poisson_backend, 
                     cache_mode = "results", 
-                    cache_location = file.path(cache_dir, "results"))
-
## Results loaded from cache file 'results'
+ cache_location = file.path(cache_dir, "results"))
+
## Results loaded from cache file 'results'

Here we also use the caching feature to avoid recomputing the fits when recompiling this vignette. In practice, caching is not necessary but is often useful.

-
-

-Viewing Results

+
+

Viewing Results +

We can now inspect the results to see if there were any errors and check individual stats:

 results$stats
-
## # A tibble: 100 x 15
-##    dataset_id parameter simulated_value  rank z_score  mean median    sd   mad
-##         <int> <chr>               <dbl> <dbl>   <dbl> <dbl>  <dbl> <dbl> <dbl>
-##  1          1 lambda               1.44    14  -1.56   1.73   1.72 0.187 0.183
-##  2          2 lambda               2.99    75  -0.226  3.05   3.04 0.265 0.262
-##  3          3 lambda               2.51    44  -0.740  2.69   2.68 0.249 0.253
-##  4          4 lambda               2.86    59  -0.666  3.04   3.03 0.265 0.256
-##  5          5 lambda               2.97   182   1.28   2.66   2.66 0.244 0.244
-##  6          6 lambda               2.54   150   0.776  2.36   2.36 0.236 0.240
-##  7          7 lambda               2.65    17  -1.48   3.04   3.03 0.268 0.274
-##  8          8 lambda               2.99   126   0.319  2.91   2.90 0.258 0.263
-##  9          9 lambda               2.55    20  -1.22   2.87   2.86 0.255 0.250
-## 10         10 lambda               1.73     8  -1.56   2.05   2.04 0.209 0.211
-## # ... with 90 more rows, and 6 more variables: q5 <dbl>, q95 <dbl>, rhat <dbl>,
-## #   ess_bulk <dbl>, ess_tail <dbl>, max_rank <int>
+
## # A tibble: 100 x 15
+##    sim_id variable simulated_value  rank z_score  mean median    sd   mad    q5
+##     <int> <chr>              <dbl> <dbl>   <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
+##  1      1 lambda              1.44    10  -1.46   1.74   1.73 0.202 0.200  1.42
+##  2      2 lambda              2.99    74  -0.226  3.05   3.04 0.265 0.262  2.63
+##  3      3 lambda              2.51    44  -0.740  2.69   2.68 0.249 0.253  2.29
+##  4      4 lambda              2.86    59  -0.666  3.04   3.03 0.265 0.256  2.63
+##  5      5 lambda              2.97   181   1.28   2.66   2.66 0.244 0.244  2.27
+##  6      6 lambda              2.54   149   0.776  2.36   2.36 0.236 0.240  1.97
+##  7      7 lambda              2.65    17  -1.48   3.04   3.03 0.268 0.274  2.61
+##  8      8 lambda              2.99   125   0.319  2.91   2.90 0.258 0.263  2.49
+##  9      9 lambda              2.55    20  -1.22   2.87   2.86 0.255 0.250  2.46
+## 10     10 lambda              1.73     9  -1.54   2.05   2.04 0.211 0.205  1.70
+## # ... with 90 more rows, and 5 more variables: q95 <dbl>, rhat <dbl>,
+## #   ess_bulk <dbl>, ess_tail <dbl>, max_rank <int>
-
-

-Plots

-

And finally, we can plot the rank distribution to check if the ranks are uniformly distributed. We can check the rank histogram and ECDF plots:

+
+

Plots +

+

And finally, we can plot the rank distribution to check if the ranks are uniformly distributed. We can check the rank histogram and ECDF plots (see vignette("rank_visualizations") for description of the plots):

 plot_rank_hist(results)
-

+

-plot_ecdf(results)
-

+plot_ecdf(results)
+

-plot_ecdf_diff(results)
-

+plot_ecdf_diff(results)
+

Since our simulator and model do match and Stan works well, we see that the plots don’t show any violation.

-
-

-Is SBC frequentist?

+
+

Is SBC frequentist? +

A bit of philosophy at the end - SBC is designed to test Bayesian models and/or algorithms, but it fits very well with standard frequentist ideas (and there is no shame about this). In fact, SBC can be understood as a very pure form of hypothesis testing as the “null hypothesis” that the ranks are uniformly distributed is completely well specified, can (beyond numerical error) actually hold exactly and we are conducting the test against a hypothesis of interest. SBC thus lets us follow a simple naive-Popperian way of thinking: we try hard to disprove a hypothesis (that our model + algorithm + generator is correct) and when we fail to disprove it, we can consider the hypothesis corroborated to the extent our test was severe. This is unlike many scientific applications of hypothesis testing where people use a rejection of the null hypothesis as evidence for alternative (which is usually not warranted).

We currently can’t provide a good theoretical understanding of the severity of a given SBC test, but obviously the more iterations and the narrower the confidence bands of the ecdf and ecdf_diff plots, the more severe the test. One can also use empirical_coverage() and plot_coverage() functions to investigate the extent of miscalibration that we cannot rule out given our results so far.

Alternatively, one can somewhat sidestep the discussions about philosophy of statistics and understand SBC as a probabilistic unit test for your model. In this view, SBC tests a certain identity that we expect to hold if our system is implemented correctly, similarly how one could test an implementation of an arithmetical system by comparing the results of computing \((x + y)^2\) and \(x^2 + 2xy + y^2\) - any mismatch means the test failed.

-
-

-Where to go next?

+
+

Where to go next? +

You may want to explore short examples showing how SBC can be used to diagnose bad parametrization or indexing bugs or you may want to read through a longer example of what we consider best practice in model-building workflow.

Alternatively, you might be interested in the limits of SBC — the types of problems that are hard / impossible to catch with SBC and what can we do to guard against those.

+
+
+

Acknowledgements +

+

Development of this package was supported by ELIXIR CZ research infrastructure project (Ministry of Youth, Education and Sports of the Czech Republic, Grant No: LM2018131) including access to computing and storage facilities.

@@ -354,11 +384,13 @@

-

Site built with pkgdown 1.6.1.

+

+

Site built with pkgdown 2.0.1.

@@ -367,5 +399,7 @@

+ + diff --git a/docs/articles/SBC_files/figure-html/ecdf-1.png b/docs/articles/SBC_files/figure-html/ecdf-1.png new file mode 100644 index 0000000..28e80c8 Binary files /dev/null and b/docs/articles/SBC_files/figure-html/ecdf-1.png differ diff --git a/docs/articles/SBC_files/figure-html/ecdf_diff-1.png b/docs/articles/SBC_files/figure-html/ecdf_diff-1.png new file mode 100644 index 0000000..a77967d Binary files /dev/null and b/docs/articles/SBC_files/figure-html/ecdf_diff-1.png differ diff --git a/docs/articles/SBC_files/figure-html/rank_hist-1.png b/docs/articles/SBC_files/figure-html/rank_hist-1.png new file mode 100644 index 0000000..d22481a Binary files /dev/null and b/docs/articles/SBC_files/figure-html/rank_hist-1.png differ diff --git a/docs/articles/bad_parametrization.html b/docs/articles/bad_parametrization.html index 0196f7f..4e66c4a 100644 --- a/docs/articles/bad_parametrization.html +++ b/docs/articles/bad_parametrization.html @@ -19,6 +19,8 @@ + +
+
-

This vignette provides the example used as Exercise 2 of the SBC tutorial presented at SBC StanConnect. Feel free to head to the tutorial website to get an interactive version and solve the problems yourself.

+

This vignette provides the example used as Exercise 2 of the SBC tutorial presented at SBC StanConnect. Feel free to head to the tutorial website to get an interactive version and solve the problems yourself.

Let’s setup the environment:

-library(SBC); 
-use_cmdstanr <- TRUE # Set to false to use rstan instead
+library(SBC); 
+use_cmdstanr <- getOption("SBC.vignettes_cmdstanr", TRUE) # Set to false to use rstan instead
 
 if(use_cmdstanr) {
-  library(cmdstanr)
+  library(cmdstanr)
 } else {
-  library(rstan)
+  library(rstan)
+  rstan_options(auto_write = TRUE)
 }
 
-options(mc.cores = parallel::detectCores())
+options(mc.cores = parallel::detectCores())
 
 # Uncomment below to have the fits evaluated in parallel
 # However, as this example evaluates just a few fits, it
@@ -153,13 +159,17 @@ 

2021-09-26

# Setup caching of results -cache_dir <- "./bad_parametrization_SBC_cache" -if(!dir.exists(cache_dir)) { - dir.create(cache_dir) +if(use_cmdstanr) { + cache_dir <- "./_bad_parametrization_SBC_cache" +} else { + cache_dir <- "./_bad_parametrization_rstan_SBC_cache" +} +if(!dir.exists(cache_dir)) { + dir.create(cache_dir) }
-

Premise: we mistakenly assume that Stan does parametrize the Gamma distribution via shape and scale, when in fact Stan uses the shape and rate parametrization (see Gamma distribution on Wikipedia for details on the parametrizations.

+

Premise: we mistakenly assume that Stan does parametrize the Gamma distribution via shape and scale, when in fact Stan uses the shape and rate parametrization (see Gamma distribution on Wikipedia for details on the parametrizations.

-cat(readLines("stan/bad_parametrization1.stan"), sep = "\n")
+cat(readLines("stan/bad_parametrization1.stan"), sep = "\n")
data {
   int N;
   vector<lower=0>[N] y;
@@ -180,66 +190,66 @@ 

2021-09-26

iter_sampling <- 1000 if(use_cmdstanr) { - model_gamma <- cmdstan_model("stan/bad_parametrization1.stan") + model_gamma <- cmdstan_model("stan/bad_parametrization1.stan") backend_gamma <- SBC_backend_cmdstan_sample( model_gamma, iter_warmup = iter_warmup, iter_sampling = iter_sampling, chains = 2) } else { - model_gamma <- stan_model("stan/bad_parametrization1.stan") + model_gamma <- stan_model("stan/bad_parametrization1.stan") backend_gamma <- SBC_backend_rstan_sample( model_gamma, iter = iter_sampling + iter_warmup, warmup = iter_warmup, chains = 2) }

Build a generator to create simulated datasets.

-set.seed(21448857)
-n_datasets <- 10
+set.seed(21448857)
+n_sims <- 10
 
-single_dataset_gamma <- function(N) {
-  shape <- rlnorm(n = 1, meanlog =  0, sdlog = 1)
-  scale <- rlnorm(n = 1, meanlog = 0, sdlog = 1.5)
+single_sim_gamma <- function(N) {
+  shape <- rlnorm(n = 1, meanlog =  0, sdlog = 1)
+  scale <- rlnorm(n = 1, meanlog = 0, sdlog = 1.5)
   
-  y <- rgamma(N, shape = shape, scale = scale)
+  y <- rgamma(N, shape = shape, scale = scale)
   
-  list(
-    parameters = list(
+  list(
+    variables = list(
       shape = shape,
       scale = scale),
-    generated = list(
+    generated = list(
       N = N,
       y = y)
   )
 }
 
 
-generator_gamma <- SBC_generator_function(single_dataset_gamma, N = 40)
+generator_gamma <- SBC_generator_function(single_sim_gamma, N = 40)
 datasets_gamma <- generate_datasets(
   generator_gamma, 
-  n_datasets)
+ n_sims)
-results_gamma <- compute_results(datasets_gamma, backend_gamma, 
+results_gamma <- compute_SBC(datasets_gamma, backend_gamma, 
                     cache_mode = "results", 
-                    cache_location = file.path(cache_dir, "model1"))
-
## Results loaded from cache file 'model1'
-
##  - 10 (100%) fits had some steps rejected. Maximum number of rejections was 5.
-
## Not all diagnostics are OK.
-## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
-## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+ cache_location = file.path(cache_dir, "model1"))
+
## Results loaded from cache file 'model1'
+
##  - 10 (100%) fits had some steps rejected. Maximum number of rejections was 5.
+
## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.

Here we also use the caching feature to avoid recomputing the fits when recompiling this vignette. In practice, caching is not necessary but is often useful.

-

10 simulations are enough to see something is wrong with the model. The problem is best seen on an ecdf_diff plot - we even see the issue is primarily with the scale parameter!

+

10 simulations are enough to see something is wrong with the model. The problem is best seen on an ecdf_diff plot - we even see the issue is primarily with the scale variable!

-plot_ecdf_diff(results_gamma)
+plot_ecdf_diff(results_gamma)

However, we can use the rank histogram (with suitable number of bins) and the ecdf plot to show different visualisations of the same problem. The rank histogram often tends to be intuitively more understandable than the ecdf plots, but tweaking the number of bins is often necessary and the confidence interval is only approximate and has decreased sensitivity.

 plot_rank_hist(results_gamma, bins = 10)

-plot_ecdf(results_gamma)
+plot_ecdf(results_gamma)

So we see that the simulation does not match the model. In practice, the problem may lie with the simulation, with the model or both. Here, we’ll assume that the simulation is correct - we really wanted to work with scale and fix the model to match. I.e. we still represent scale in our model, but invert it to get rate before using Stan’s gamma distribution:

-cat(readLines("stan/bad_parametrization2.stan"), sep = "\n")
+cat(readLines("stan/bad_parametrization2.stan"), sep = "\n")
data {
   int N;
   vector<lower=0>[N] y;
@@ -257,34 +267,34 @@ 

2021-09-26

}
 if(use_cmdstanr) {
-  model_gamma_2 <- cmdstan_model("stan/bad_parametrization2.stan")
+  model_gamma_2 <- cmdstan_model("stan/bad_parametrization2.stan")
 
   backend_gamma_2 <- SBC_backend_cmdstan_sample(
     model_gamma_2, iter_warmup = iter_warmup, iter_sampling = iter_sampling, chains = 2)
 } else {
-  model_gamma_2 <- stan_model("stan/bad_parametrization2.stan")
+  model_gamma_2 <- stan_model("stan/bad_parametrization2.stan")
 
   backend_gamma_2 <- SBC_backend_rstan_sample(
     model_gamma_2, iter = iter_sampling + iter_warmup, warmup = iter_warmup, chains = 2)
 }
-results_gamma2 <- compute_results(datasets_gamma, backend_gamma_2, 
+results_gamma2 <- compute_SBC(datasets_gamma, backend_gamma_2, 
                     cache_mode = "results", 
-                    cache_location = file.path(cache_dir, "model2"))
-
## Results loaded from cache file 'model2'
-
##  - 10 (100%) fits had some steps rejected. Maximum number of rejections was 9.
-
## Not all diagnostics are OK.
-## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
-## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+ cache_location = file.path(cache_dir, "model2"))
+
## Results loaded from cache file 'model2'
+
##  - 10 (100%) fits had some steps rejected. Maximum number of rejections was 6.
+
## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.

No obvious problems here, but if we wanted to be sure, we should have ran a lot more simulations.

-plot_ecdf_diff(results_gamma2)
+plot_ecdf_diff(results_gamma2)

 plot_rank_hist(results_gamma2, bins = 10)

-plot_ecdf(results_gamma2)
+plot_ecdf(results_gamma2)

@@ -297,11 +307,13 @@

2021-09-26

-

Site built with pkgdown 1.6.1.

+

+

Site built with pkgdown 2.0.1.

@@ -310,5 +322,7 @@

2021-09-26

+ + diff --git a/docs/articles/bad_parametrization_files/figure-html/results1_ecdf_diff-1.png b/docs/articles/bad_parametrization_files/figure-html/results1_ecdf_diff-1.png index af6e23a..f657f77 100644 Binary files a/docs/articles/bad_parametrization_files/figure-html/results1_ecdf_diff-1.png and b/docs/articles/bad_parametrization_files/figure-html/results1_ecdf_diff-1.png differ diff --git a/docs/articles/bad_parametrization_files/figure-html/results1_other-1.png b/docs/articles/bad_parametrization_files/figure-html/results1_other-1.png index e9f28a4..7e30f88 100644 Binary files a/docs/articles/bad_parametrization_files/figure-html/results1_other-1.png and b/docs/articles/bad_parametrization_files/figure-html/results1_other-1.png differ diff --git a/docs/articles/bad_parametrization_files/figure-html/results1_other-2.png b/docs/articles/bad_parametrization_files/figure-html/results1_other-2.png index f5a9848..72bfd2d 100644 Binary files a/docs/articles/bad_parametrization_files/figure-html/results1_other-2.png and b/docs/articles/bad_parametrization_files/figure-html/results1_other-2.png differ diff --git a/docs/articles/bad_parametrization_files/figure-html/results2_plots-1.png b/docs/articles/bad_parametrization_files/figure-html/results2_plots-1.png index 7943422..27017f9 100644 Binary files a/docs/articles/bad_parametrization_files/figure-html/results2_plots-1.png and b/docs/articles/bad_parametrization_files/figure-html/results2_plots-1.png differ diff --git a/docs/articles/bad_parametrization_files/figure-html/results2_plots-2.png b/docs/articles/bad_parametrization_files/figure-html/results2_plots-2.png index 8f77920..69c5d00 100644 Binary files a/docs/articles/bad_parametrization_files/figure-html/results2_plots-2.png and b/docs/articles/bad_parametrization_files/figure-html/results2_plots-2.png differ diff --git a/docs/articles/bad_parametrization_files/figure-html/results2_plots-3.png b/docs/articles/bad_parametrization_files/figure-html/results2_plots-3.png index 8e55bd9..953274c 100644 Binary files a/docs/articles/bad_parametrization_files/figure-html/results2_plots-3.png and b/docs/articles/bad_parametrization_files/figure-html/results2_plots-3.png differ diff --git a/docs/articles/bad_parametrization_files/header-attrs-2.10/header-attrs.js b/docs/articles/bad_parametrization_files/header-attrs-2.10/header-attrs.js deleted file mode 100644 index dd57d92..0000000 --- a/docs/articles/bad_parametrization_files/header-attrs-2.10/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/docs/articles/bad_parametrization_files/header-attrs-2.11/header-attrs.js b/docs/articles/bad_parametrization_files/header-attrs-2.11/header-attrs.js deleted file mode 100644 index dd57d92..0000000 --- a/docs/articles/bad_parametrization_files/header-attrs-2.11/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/docs/articles/basic_usage_files/figure-html/ecdf-1.png b/docs/articles/basic_usage_files/figure-html/ecdf-1.png deleted file mode 100644 index 103411f..0000000 Binary files a/docs/articles/basic_usage_files/figure-html/ecdf-1.png and /dev/null differ diff --git a/docs/articles/basic_usage_files/figure-html/ecdf_diff-1.png b/docs/articles/basic_usage_files/figure-html/ecdf_diff-1.png deleted file mode 100644 index f6477f4..0000000 Binary files a/docs/articles/basic_usage_files/figure-html/ecdf_diff-1.png and /dev/null differ diff --git a/docs/articles/basic_usage_files/figure-html/rank_hist-1.png b/docs/articles/basic_usage_files/figure-html/rank_hist-1.png deleted file mode 100644 index feba84c..0000000 Binary files a/docs/articles/basic_usage_files/figure-html/rank_hist-1.png and /dev/null differ diff --git a/docs/articles/basic_usage_files/header-attrs-2.10/header-attrs.js b/docs/articles/basic_usage_files/header-attrs-2.10/header-attrs.js deleted file mode 100644 index dd57d92..0000000 --- a/docs/articles/basic_usage_files/header-attrs-2.10/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/docs/articles/basic_usage_files/header-attrs-2.11/header-attrs.js b/docs/articles/basic_usage_files/header-attrs-2.11/header-attrs.js deleted file mode 100644 index dd57d92..0000000 --- a/docs/articles/basic_usage_files/header-attrs-2.11/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/docs/articles/brms.html b/docs/articles/brms.html index fc1752c..d7493a7 100644 --- a/docs/articles/brms.html +++ b/docs/articles/brms.html @@ -19,6 +19,8 @@ + +
+
@@ -132,42 +137,52 @@

2021-09-26

This vignette shows how the SBC package supports brms models. Let’s setup the environment:

-library(SBC)
-library(brms)
-library(ggplot2)
-options(brms.backend = "cmdstanr")
-# options(brms.backend = "rstan") # Uncomment to use rstan instead
+library(SBC)
+library(brms)
+library(ggplot2)
+
+use_cmdstanr <- getOption("SBC.vignettes_cmdstanr", TRUE) # Set to false to use rstan instead
+
+if(use_cmdstanr) {
+  options(brms.backend = "cmdstanr")
+} else {
+  options(brms.backend = "rstan") 
+  rstan::rstan_options(auto_write = TRUE)
+}
 
 # Using parallel processing
-library(future)
-plan(multisession)
+library(future)
+plan(multisession)
 
 # The fits are very fast,
 # so we force a minimum chunk size to reduce overhead of
 # paralellization and decrease computation time.
-options(SBC.min_chunk_size = 5)
+options(SBC.min_chunk_size = 5)
 
 # Setup caching of results
-cache_dir <- "./brms_SBC_cache"
-if(!dir.exists(cache_dir)) {
-  dir.create(cache_dir)
+if(use_cmdstanr) {
+  cache_dir <- "./_brms_SBC_cache"
+} else { 
+  cache_dir <- "./_brms_rstan_SBC_cache"
+}
+if(!dir.exists(cache_dir)) {
+  dir.create(cache_dir)
 }
-
-

-Generating data using brms +
+

Generating data using brms

The brms package has a built-in feature to simulate from prior corresponding to the model via the sample_prior = "only" option. This is a bit less useful in model validation as bug in brms (or any mismatch between what brms does and what we think it does) cannot be found as it will most likely affect the generator and the backend in the same way. Still this can be useful for validating brms itself - we’ll get to validation with custom generators in a while. For now, we’ll build a generator using brms directly.

-

Generating datasets with this generator requires us to compile a Stan model and may thus take a while. Also the exploration is often problematic, so to avoid problems, we take a lot of samples and thin the resulting samples heavily.

+

Generating simulations with this generator requires us to compile a Stan model and may thus take a while. Also the exploration is often problematic, so to avoid problems, we take a lot of draws and thin the resulting draws heavily.

 # We need a "template dataset" to let brms build the model.
 # The predictor (x) values will be used for data generation,
 # the response (y) values will be ignored, but need to be present and 
 # of the correct data type
-set.seed(213452)
-template_data = data.frame(y = rep(0, 15), x = rnorm(15))
-priors <- prior(normal(0,1), class = "b") +
-  prior(normal(0,1), class = "Intercept") +
-  prior(normal(0,1), class = "sigma")
+set.seed(213452)
+template_data = data.frame(y = rep(0, 15), x = rnorm(15))
+priors <- prior(normal(0,1), class = "b") +
+  prior(normal(0,1), class = "Intercept") +
+  prior(normal(0,1), class = "sigma")
 generator <- SBC_generator_brms(y ~ x, data = template_data, prior = priors, 
                                 thin = 50, warmup = 10000, refresh = 2000,
                                 # Will generate the log density - this is useful, 
@@ -175,174 +190,174 @@ 

generate_lp = TRUE )

-set.seed(22133548)
+set.seed(22133548)
 datasets <- generate_datasets(generator, 100)
-
## Running MCMC with 1 chain...
-## 
-## Chain 1 Iteration:     1 / 15000 [  0%]  (Warmup) 
-## Chain 1 Iteration:  2000 / 15000 [ 13%]  (Warmup) 
-## Chain 1 Iteration:  4000 / 15000 [ 26%]  (Warmup) 
-## Chain 1 Iteration:  6000 / 15000 [ 40%]  (Warmup) 
-## Chain 1 Iteration:  8000 / 15000 [ 53%]  (Warmup) 
-## Chain 1 Iteration: 10000 / 15000 [ 66%]  (Warmup) 
-## Chain 1 Iteration: 10001 / 15000 [ 66%]  (Sampling) 
-## Chain 1 Iteration: 12000 / 15000 [ 80%]  (Sampling) 
-## Chain 1 Iteration: 14000 / 15000 [ 93%]  (Sampling) 
-## Chain 1 Iteration: 15000 / 15000 [100%]  (Sampling) 
-## Chain 1 finished in 0.2 seconds.
+
## Running MCMC with 1 chain...
+## 
+## Chain 1 Iteration:     1 / 15000 [  0%]  (Warmup) 
+## Chain 1 Iteration:  2000 / 15000 [ 13%]  (Warmup) 
+## Chain 1 Iteration:  4000 / 15000 [ 26%]  (Warmup) 
+## Chain 1 Iteration:  6000 / 15000 [ 40%]  (Warmup) 
+## Chain 1 Iteration:  8000 / 15000 [ 53%]  (Warmup) 
+## Chain 1 Iteration: 10000 / 15000 [ 66%]  (Warmup) 
+## Chain 1 Iteration: 10001 / 15000 [ 66%]  (Sampling) 
+## Chain 1 Iteration: 12000 / 15000 [ 80%]  (Sampling) 
+## Chain 1 Iteration: 14000 / 15000 [ 93%]  (Sampling) 
+## Chain 1 Iteration: 15000 / 15000 [100%]  (Sampling) 
+## Chain 1 finished in 0.2 seconds.
+
## Warning: Some rhats are > 1.01 indicating the prior was not explored well.
+## The highest rhat is 1.02 for sigma
+## Consider adding warmup iterations (via 'warmup' argument).

Now we’ll build a backend matching the generator (and reuse the compiled model from the generator)

-
+
 backend <- SBC_backend_brms_from_generator(generator, chains = 1, thin = 1,
-                            init = 0.1)
+                            warmup = 500, iter = 1500,               
+                            inits = 0.1)
 
 # More verbose alternative that results in exactly the same backend:
-# backend <- SBC_backend_brms(y ~ x, template_dataset = template_data, prior = priors, warmup = 500, iter = 1000, chains = 1, thin = 1
+# backend <- SBC_backend_brms(y ~ x, template_data = template_data, prior = priors, warmup = 500, iter = 1000, chains = 1, thin = 1
 #                            init = 0.1)

Compute the actual results

-
-results <- compute_results(datasets, backend, thin_ranks = 10, 
+
+results <- compute_SBC(datasets, backend,
                     cache_mode = "results", 
-                    cache_location = file.path(cache_dir, "first"))
-
## Results loaded from cache file 'first'
-
##  - 10 (10%) fits had at least one Rhat > 1.01. Largest Rhat was 1.018.
-
##  - 62 (62%) fits had some steps rejected. Maximum number of rejections was 5.
-
## Not all diagnostics are OK.
-## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
-## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+ cache_location = file.path(cache_dir, "first"))
+
## Results loaded from cache file 'first'
+
##  - 14 (14%) fits had at least one Rhat > 1.01. Largest Rhat was 1.02.
+
##  - 50 (50%) fits had some steps rejected. Maximum number of rejections was 1.
+
## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.

There are some problems, that we currently choose to ignore (the highest Rhat is barely above the 1.01 threshold, so it is probably just noise in Rhat computation).

So we can inspect the rank plots. There are no big problems at this resolution.

-
+
 plot_rank_hist(results)

-
-plot_ecdf_diff(results)
+
+plot_ecdf_diff(results)

-
-

-Using custom generator code

+
+

Using custom generator code +

Let’s take a bit more complex model - with a single varying intercept.

This time we will not use the brms model to also simulate from prior, but simulate using an R function. This way, we get to learn if brms does what we think it does!

-

Custom generator code also allows us to have different covariate values for each dataset, potentially improving sensitivity if we want to check the model for a range of potential covariate values. If on the other hand we are interested in a specific dataset, it might make more sense to use the predictors as seen in the dataset in all simulations to focus our efforts on the dataset at hand.

+

Custom generator code also allows us to have different covariate values for each simulation, potentially improving sensitivity if we want to check the model for a range of potential covariate values. If on the other hand we are interested in a specific dataset, it might make more sense to use the predictors as seen in the dataset in all simulations to focus our efforts on the dataset at hand.

Let’s take a Gaussian model with a single varying intercept.

The data can be generated using the following code - note that we need to be careful to match the parameter names as brms uses them. You can call parnames on a fit to see them.

-
-one_dataset_generator <- function(N, K) {
+
+one_sim_generator <- function(N, K) {
   # N - number of datapoints, K number of groups for the varying intercept
-  stopifnot(3 * K <= N)
-  x <- rnorm(N) + 5
+  stopifnot(3 * K <= N)
+  x <- rnorm(N) + 5
   
-  group <- sample(1:K, size = N, replace = TRUE)
+  group <- sample(1:K, size = N, replace = TRUE)
   # Ensure all groups are actually present at least twice
-  group[1:(3*K)] <- rep(1:K, each = 3)
+  group[1:(3*K)] <- rep(1:K, each = 3)
 
-  b_Intercept <- rnorm(1, 5, 1)   
-  b_x <- rnorm(1, 0, 1)
+  b_Intercept <- rnorm(1, 5, 1)   
+  b_x <- rnorm(1, 0, 1)
   
-  sd_group__Intercept <- abs(rnorm(1, 0, 0.75))
-  r_group <- matrix(rnorm(K, 0, sd_group__Intercept), 
+  sd_group__Intercept <- abs(rnorm(1, 0, 0.75))
+  r_group <- matrix(rnorm(K, 0, sd_group__Intercept), 
                  nrow = K, ncol = 1,
-                 dimnames = list(1:K, "Intercept"))
+                 dimnames = list(1:K, "Intercept"))
   
-  sigma <- abs(rnorm(1, 0, 3))
+  sigma <- abs(rnorm(1, 0, 3))
   
   predictor <- b_Intercept + x * b_x + r_group[group]
-  y <- rnorm(N, predictor, sigma)
+  y <- rnorm(N, predictor, sigma)
   
-  list(
-    parameters = list(
+  list(
+    variables = list(
       b_Intercept = b_Intercept,
       b_x = b_x,
       sd_group__Intercept = sd_group__Intercept,
       r_group = r_group,
       sigma = sigma
     ),
-    generated = data.frame(y = y, x = x, group = group)
+    generated = data.frame(y = y, x = x, group = group)
   )
 }
 
-n_dataset_generator <- SBC_generator_function(one_dataset_generator, N = 18, K = 5)
+n_sims_generator <- SBC_generator_function(one_sim_generator, N = 18, K = 5)

For increased sensitivity, we also add the log likelihood of the data given parameters as a generated quantity that we’ll also monitor (see the limits_of_SBC vignette for discussion on why this is useful).

-
-log_lik_gq_func <- generated_quantities(
-  log_lik = sum(dnorm(y, b_Intercept + x * b_x + r_group[group], sigma, log = TRUE)))
-set.seed(12239755)
-datasets_func <- generate_datasets(n_dataset_generator, 100)
-

This is then our brms backend - note that brms requires us to provide a sample dataset that it will use to build the model (e.g. to see how many levels of various varying intercepts to include):

+log_lik_gq_func <- generated_quantities( + log_lik = sum(dnorm(y, b_Intercept + x * b_x + r_group[group], sigma, log = TRUE)))
-priors_func <- prior(normal(0,1), class = "b") +
-  prior(normal(5,1), class = "Intercept") +
-  prior(normal(0,5), class = "sigma") +
-  prior(normal(0,0.75), class = "sd")
+set.seed(12239755)
+datasets_func <- generate_datasets(n_sims_generator, 100)
+

This is then our brms backend - note that brms requires us to provide a dataset that it will use to build the model (e.g. to see how many levels of various varying intercepts to include):

+
+priors_func <- prior(normal(0,1), class = "b") +
+  prior(normal(5,1), class = "Intercept") +
+  prior(normal(0,5), class = "sigma") +
+  prior(normal(0,0.75), class = "sd")
 
 
 backend_func <- SBC_backend_brms(y ~ x + (1 | group),  
                             prior = priors_func, chains = 1,
-                            template_dataset = datasets_func$generated[[1]])
+ template_data = datasets_func$generated[[1]])

So we can happily compute:

-
-results_func <- compute_results(datasets_func, backend_func, thin_ranks = 10, 
+
+results_func <- compute_SBC(datasets_func, backend_func, 
                                 gen_quants = log_lik_gq_func, 
                     cache_mode = "results", 
-                    cache_location = file.path(cache_dir, "func"))
-
## Results loaded from cache file 'func'
-
##  - 34 (34%) fits had at least one Rhat > 1.01. Largest Rhat was 1.085.
-
##  - 6 (6%) fits had tail ESS undefined or less than half of the maximum rank, potentially skewing 
-## the rank statistics. The lowest tail ESS was NA.
-##  If the fits look good otherwise, increasing `thin_ranks` (via recompute_statistics) 
-## or number of posterior samples (by refitting) might help.
-
##  - 33 (33%) fits had divergent transitions. Maximum number of divergences was 81.
-
##  - 2 (2%) fits had iterations that saturated max treedepth. Maximum number of max treedepth was 553.
-
##  - 77 (77%) fits had some steps rejected. Maximum number of rejections was 6.
-
## Not all diagnostics are OK.
-## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
-## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+ cache_location = file.path(cache_dir, "func"))
+
## Results loaded from cache file 'func'
+
##  - 35 (35%) fits had at least one Rhat > 1.01. Largest Rhat was 1.113.
+
##  - 6 (6%) fits had tail ESS undefined or less than half of the maximum rank, potentially skewing 
+## the rank statistics. The lowest tail ESS was 25.
+##  If the fits look good otherwise, increasing `thin_ranks` (via recompute_SBC_statistics) 
+## or number of posterior draws (by refitting) might help.
+
##  - 44 (44%) fits had divergent transitions. Maximum number of divergences was 52.
+
##  - 2 (2%) fits had iterations that saturated max treedepth. Maximum number of max treedepth was 465.
+
##  - 78 (78%) fits had some steps rejected. Maximum number of rejections was 7.
+
## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.

So that’s not looking good! Divergent transitions, Rhat problems… And the rank plots also show problems:

-
+
 plot_rank_hist(results_func)

-
-plot_ecdf_diff(results_func)
+
+plot_ecdf_diff(results_func)

-

It looks like there is a problem affecting at least the b_Intercept and sigma parameters. We may also notice that the log_lik (log likelihood derived from all the parameters) is copying the behaviour of the worst behaving parameter. This tends to be the case in many models, so in models with lots of parameters, it can be useful to add such a term as they make noticing problems easier.

-

What happened is that brms by default centers all the predictors, which changes the numerical values of the intercept (but not other terms). The interaction with the prior than probably also affects the other parameters.

+

It looks like there is a problem affecting at least the b_Intercept and sigma variables. We may also notice that the log_lik (log likelihood derived from all the parameters) is copying the behaviour of the worst behaving variable. This tends to be the case in many models, so in models with lots of variables, it can be useful to add such a term as they make noticing problems easier.

+

What happened is that brms by default centers all the predictors, which changes the numerical values of the intercept (but not other terms). The interaction with the prior than probably also affects the other variables.

Maybe we don’t want brms to do this — using 0 + Intercept syntax avoids the centering, so we build a new backend that should match our simulator better

-
+
 # Using 0 + Intercept also changes how we need to specify priors
-priors_func2 <- prior(normal(0,1), class = "b") +
-  prior(normal(5,1), class = "b", coef = "Intercept") +
-  prior(normal(0,5), class = "sigma") +
-  prior(normal(0,0.75), class = "sd")
+priors_func2 <- prior(normal(0,1), class = "b") +
+  prior(normal(5,1), class = "b", coef = "Intercept") +
+  prior(normal(0,5), class = "sigma") +
+  prior(normal(0,0.75), class = "sd")
 
 
 backend_func2 <- SBC_backend_brms(y ~ 0 + Intercept + x + (1 | group),  
                             prior = priors_func2, warmup = 1000, iter = 2000, chains = 1,
-                            template_dataset = datasets_func$generated[[1]])
-

Let’s fit the same datasets with the new backend.

-
-results_func2 <- compute_results(datasets_func, backend_func2, thin_ranks = 10, 
+                            template_data = datasets_func$generated[[1]])
+

Let’s fit the same simulations with the new backend.

+
+results_func2 <- compute_SBC(datasets_func, backend_func2, 
                                  gen_quants = log_lik_gq_func, 
                     cache_mode = "results", 
-                    cache_location = file.path(cache_dir, "func2"))
-
## Results loaded from cache file 'func2'
-
##  - 20 (20%) fits had at least one Rhat > 1.01. Largest Rhat was 1.074.
-
##  - 1 (1%) fits had tail ESS undefined or less than half of the maximum rank, potentially skewing 
-## the rank statistics. The lowest tail ESS was 46.
-##  If the fits look good otherwise, increasing `thin_ranks` (via recompute_statistics) 
-## or number of posterior samples (by refitting) might help.
-
##  - 4 (4%) fits had divergent transitions. Maximum number of divergences was 1.
-
##  - 1 (1%) fits had iterations that saturated max treedepth. Maximum number of max treedepth was 530.
-
##  - 80 (80%) fits had some steps rejected. Maximum number of rejections was 8.
-
## Not all diagnostics are OK.
-## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
-## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+ cache_location = file.path(cache_dir, "func2"))
+
## Results loaded from cache file 'func2'
+
##  - 22 (22%) fits had at least one Rhat > 1.01. Largest Rhat was 1.036.
+
##  - 7 (7%) fits had divergent transitions. Maximum number of divergences was 7.
+
##  - 1 (1%) fits had iterations that saturated max treedepth. Maximum number of max treedepth was 522.
+
##  - 80 (80%) fits had some steps rejected. Maximum number of rejections was 9.
+
## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.

We see that this still results in some problematic fits, but the proportion got lower. At this point I am honestly unsure what is the issue, but the rank plots look mostly OK:

 plot_rank_hist(results_func2)

-plot_ecdf_diff(results_func2)
+plot_ecdf_diff(results_func2)

I promise to update this vignette once I figure out the source of the problems.

@@ -359,11 +374,13 @@

-

Site built with pkgdown 1.6.1.

+

+

Site built with pkgdown 2.0.1.

@@ -372,5 +389,7 @@

+ + diff --git a/docs/articles/brms_files/figure-html/results_func2_plots-1.png b/docs/articles/brms_files/figure-html/results_func2_plots-1.png index e9e1b23..32b293e 100644 Binary files a/docs/articles/brms_files/figure-html/results_func2_plots-1.png and b/docs/articles/brms_files/figure-html/results_func2_plots-1.png differ diff --git a/docs/articles/brms_files/figure-html/results_func2_plots-2.png b/docs/articles/brms_files/figure-html/results_func2_plots-2.png index a152b14..ab8d88c 100644 Binary files a/docs/articles/brms_files/figure-html/results_func2_plots-2.png and b/docs/articles/brms_files/figure-html/results_func2_plots-2.png differ diff --git a/docs/articles/brms_files/figure-html/results_func_plots-1.png b/docs/articles/brms_files/figure-html/results_func_plots-1.png index 5e9d07f..3370178 100644 Binary files a/docs/articles/brms_files/figure-html/results_func_plots-1.png and b/docs/articles/brms_files/figure-html/results_func_plots-1.png differ diff --git a/docs/articles/brms_files/figure-html/results_func_plots-2.png b/docs/articles/brms_files/figure-html/results_func_plots-2.png index d967bc7..7028357 100644 Binary files a/docs/articles/brms_files/figure-html/results_func_plots-2.png and b/docs/articles/brms_files/figure-html/results_func_plots-2.png differ diff --git a/docs/articles/brms_files/figure-html/results_plots-1.png b/docs/articles/brms_files/figure-html/results_plots-1.png index 98f1a7b..eae24b6 100644 Binary files a/docs/articles/brms_files/figure-html/results_plots-1.png and b/docs/articles/brms_files/figure-html/results_plots-1.png differ diff --git a/docs/articles/brms_files/figure-html/results_plots-2.png b/docs/articles/brms_files/figure-html/results_plots-2.png index 0f74213..e46fed4 100644 Binary files a/docs/articles/brms_files/figure-html/results_plots-2.png and b/docs/articles/brms_files/figure-html/results_plots-2.png differ diff --git a/docs/articles/brms_files/header-attrs-2.10/header-attrs.js b/docs/articles/brms_files/header-attrs-2.10/header-attrs.js deleted file mode 100644 index dd57d92..0000000 --- a/docs/articles/brms_files/header-attrs-2.10/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/docs/articles/brms_files/header-attrs-2.11/header-attrs.js b/docs/articles/brms_files/header-attrs-2.11/header-attrs.js deleted file mode 100644 index dd57d92..0000000 --- a/docs/articles/brms_files/header-attrs-2.11/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/docs/articles/computational_algorithm1.html b/docs/articles/computational_algorithm1.html new file mode 100644 index 0000000..d699af1 --- /dev/null +++ b/docs/articles/computational_algorithm1.html @@ -0,0 +1,871 @@ + + + + + + + +SBC for ADVI and optimizing in Stan (+HMMs) • SBC + + + + + + + + + + + + +
+
+ + + + +
+
+ + + + +
+

Summary +

+

Computational algorithms such as variational inference (VI) can fail due to the inability of the approximation family to capture the true posterior, under/over penalizing tendencies of convergence metric, and slow convergence of the optimization process. We’ll discuss 3 examples:

+
    +
  • In Example I a simple Poisson model is shown that is well handled by default ADVI if the size of the data is small, but becomes miscalibrated when larger amount of observations is available. It also turns out that for such a simple model using optimizing leads to very good results.

  • +
  • In Example II we discuss a Hidden Markov Model where the approximation by ADVI is imperfect but not very wrong. We also show how the (mis)calibration responds to changing parameters of the ADVI implementation and that optimizing performs worse than ADVI.

  • +
  • In Example III we show that a small modification to the model from Example II makes the ADVI approximation perform much worse.

  • +
+

When the fit between posterior and approximation family, convergence metric and its process are checked so that efficiency is gained without sacrificing accuracy too much, VI can be applied. On top of its role as “the test” computational algorithms should pass, SBC provides informative inferential results which directly affect workflow decisions.

+
+
+

Introduction +

+

HMC can be slow and depending on the joint posterior (as a combination of data, prior, and likelihood) and the user’s goal, deterministic approximation algorithms can be an aid. To be specific, if the joint posterior is well-formed enough for reliable approximation (symmetric for ADVI which has normal approximation family) or the user only needs point estimate (i.e. specification up to distribution-level is not needed) users can consider the deterministic alternatives for their inference tool such as ADVI supported by Stan. Note that Pathfinder (Zhang, 2021) which blends deterministic algorithm’s efficiency and stochastic algorithm’s accuracy in a timely manner is under development. SBC provides one standard to test whether ADVI works well for your model without ever needing to run full HMC for your model.

+

Let’s start by setting up our environment.

+
+library(SBC)
+library(ggplot2)
+library(cmdstanr)
+
+library(rstan)
+rstan_options(auto_write = TRUE)
+
+
+options(mc.cores = parallel::detectCores())
+
+# Parallel processing
+
+library(future)
+plan(multisession)
+
+# The fits are very fast,
+# so we force a minimum chunk size to reduce the overhead of
+# paralellization and decrease computation time.
+options(SBC.min_chunk_size = 5)
+
+
+# Setup caching of results
+cache_dir <- "./_approximate_computation_SBC_cache"
+if(!dir.exists(cache_dir)) {
+  dir.create(cache_dir)
+}
+
+
+

Example I - Poisson +

+

We’ll start by the extremely simple Poisson model already introduced in the basic usage vignette:

+
+cat(readLines("stan/poisson.stan"), sep = "\n")
+
data{
+  int N;
+  int y[N];
+}
+parameters{
+  real<lower = 0> lambda;
+}
+model{
+  lambda ~ gamma(15, 5);
+  y ~ poisson(lambda);
+}
+

And here’s R code that generates data matching that model:

+
+poisson_generator_single <- function(N){  
+  # N is the number of data points we are generating
+  lambda <- rgamma(n = 1, shape = 15, rate = 5)
+  y <- rpois(n = N, lambda = lambda)
+  list(
+    variables = list(
+      lambda = lambda
+    ),
+    generated = list(
+      N = N,
+      y = y
+    )
+  )
+}
+

We’ll start with Stan’s ADVI with all default parameters, i.e. a mean-field variational approximation. We compile the model and create a variational SBC backend.

+
+model_poisson <- cmdstan_model("stan/poisson.stan")
+backend_poisson <- SBC_backend_cmdstan_variational(model_poisson, n_retries_init = 3)
+

Note that we allow the backend to retry initialization several times (n_retries_init), as the ADVI implementation in Stan can sometimes fail to start properly on the first try even for very simple models. This ability to retry is an extension in the SBC package and not implemented in Stan.

+

Throughout the vignette, we’ll also use caching for the results.

+

Since the model runs quickly and is simple, we start with 1000 simulations.

+
+set.seed(46522641)
+ds_poisson <- generate_datasets(
+  SBC_generator_function(poisson_generator_single, N = 20), 
+  n_sims = 1000)
+res_poisson <- 
+  compute_SBC(
+    ds_poisson, backend_poisson, keep_fits = FALSE,
+    cache_mode = "results", cache_location = file.path(cache_dir, "poisson"))
+
## Results loaded from cache file 'poisson'
+

Even with the quite high precision afforded by 1000 simulations, the ECDF diff plot and the ranks show no problems - the model is quite well calibrated, although the wavy shape of the ECDF suggest a minor overconfidence of the approximation:

+
+plot_ecdf_diff(res_poisson)
+

+
+plot_rank_hist(res_poisson)
+

+

To put this in different terms we can look at the observed coverage of central 50%, 80% and 95% intervals. We see that the observed coverage for 50% and 80% intervals is a bit lower than expected.

+
+empirical_coverage(res_poisson$stats,width = c(0.95, 0.8, 0.5))
+
## # A tibble: 3 x 6
+##   variable width width_represented ci_low estimate ci_high
+##   <chr>    <dbl>             <dbl>  <dbl>    <dbl>   <dbl>
+## 1 lambda    0.5               0.5   0.418    0.449   0.480
+## 2 lambda    0.8               0.8   0.749    0.776   0.801
+## 3 lambda    0.95              0.95  0.934    0.949   0.961
+
+

Is more data better? +

+

One would expect that the normal approximation implemented in ADVI becomes better with increased size of the data, this is however not necessarily true - let’s run the same model, but increase N - the number of observed data points:

+
+set.seed(23546224)
+ds_poisson_100 <- generate_datasets(
+  SBC_generator_function(poisson_generator_single, N = 100), 
+  n_sims = 1000)
+res_poisson_100 <- 
+  compute_SBC(ds_poisson_100, backend_poisson, keep_fits = FALSE,
+                  cache_mode = "results", cache_location = file.path(cache_dir, "poisson_100"))
+
## Results loaded from cache file 'poisson_100'
+

In this case the model becomes clearly overconfident:

+
+plot_ecdf_diff(res_poisson_100)
+

+
+plot_rank_hist(res_poisson_100)
+

+

The empirical coverage of the central intervals confirms this:

+
+empirical_coverage(res_poisson_100$stats,width = c(0.95, 0.8, 0.5))
+
## # A tibble: 3 x 6
+##   variable width width_represented ci_low estimate ci_high
+##   <chr>    <dbl>             <dbl>  <dbl>    <dbl>   <dbl>
+## 1 lambda    0.5               0.5   0.411    0.442   0.473
+## 2 lambda    0.8               0.8   0.684    0.713   0.740
+## 3 lambda    0.95              0.95  0.883    0.903   0.920
+
+
+

Optimizing +

+

If the model is so simple, maybe a simple Laplace approximation around the posterior mode would suffice? We can use Stan’s optimizing mode exactly for that. Although unfortunately, this is currently implemented only in rstan and not for cmdstanr (because the underlying CmdStan does not expose the Hessian of the optimizing fit).

+

So let us build an optimizing backend

+
+model_poisson_rstan <- stan_model("stan/poisson.stan")
+backend_poisson_optimizing <- SBC_backend_rstan_optimizing(model_poisson_rstan)
+

and use it to fit the same datasets - first to the one with N = 20.

+
+res_poisson_optimizing <- 
+  compute_SBC(ds_poisson, backend_poisson_optimizing, keep_fits = FALSE,
+    cache_mode = "results", cache_location = file.path(cache_dir, "poisson_opt"))
+
## Results loaded from cache file 'poisson_opt'
+

The resulting ECDF and rank plots are very good.

+
+plot_ecdf_diff(res_poisson_optimizing)
+

+
+plot_rank_hist(res_poisson_optimizing)
+

+

Similarly, we can fit the N = 100 datasets.

+
+res_poisson_optimizing_100 <- 
+  compute_SBC(ds_poisson_100, backend_poisson_optimizing, keep_fits = FALSE,
+    cache_mode = "results", cache_location = file.path(cache_dir, "poisson_opt_100"))
+
## Results loaded from cache file 'poisson_opt_100'
+

The resulting rank plot once again indicates no serious issues and we thus get better results here than with ADVI.

+
+plot_ecdf_diff(res_poisson_optimizing_100)
+

+
+plot_rank_hist(res_poisson_optimizing_100)
+

+
+
+

Summary +

+

We see that for simple models ADVI can provide very tight approximation to exact inference, but this cannot be taken for granted. Surprisingly, having more data does not make the ADVI approximation necessarily better. Additionally, for such simple models, a simple Laplace approximation around the posterior mode works better (and likely faster) than ADVI.

+
+
+
+

Example II - Hidden Markov Model +

+

We’ll jump to a quite more complex model (partially because we wanted to have a HMM example).

+

In this example, we have collected a set of counts of particles emitted by a specimen in a relatively large number of experimental runs. We however noticed that there is a suspiciously large number of low counts. Inspecting the equipment, it turns out that the experiment was not set up properly and in some of the runs, our detector could only register background noise. We however don’t know which runs were erroneous.

+

So we assume that some experiments contain both background noise and the signal of interest and the rest contain just the background. For simplicity, we assume a Poisson distribution for the counts.

+

Additionally, observing background only vs. signal in individual data points is not independent and we want to model how the experimental setup switches between these two states over time. We add additional structure to the model to account for this autocorrelation.

+

One possible choice for such structure is hidden Markov models (HMMs) where we assume the probability of transitioning from one state to another is identical across all time points. The case study for HMMs has a more thorough discussion and also shows how to code those in Stan.

+

Maybe the simplest way to describe the model is to show how we simulate the data:

+
+generator_HMM <- function(N) {
+  
+  mu_background <- rlnorm(1, -2, 1)
+  mu_signal <- rlnorm(1, 2, 1)
+
+  # Draw the transition probabilities
+  t1 <- MCMCpack::rdirichlet(1, c(3, 3))
+  t2 <- MCMCpack::rdirichlet(1, c(3, 3))
+
+  states = rep(NA_integer_, N)
+  # Draw from initial state distribution
+  rho <- MCMCpack::rdirichlet(1, c(1, 10))
+
+  # Simulate the hidden states
+  states[1] = sample(1:2, size = 1, prob = rho)
+  for(n in 2:length(states)) {
+    if(states[n - 1] == 1)
+      states[n] = sample(c(1, 2), size = 1, prob = t1)
+    else if(states[n - 1] == 2)
+      states[n] = sample(c(1, 2), size = 1, prob = t2)
+  }  
+
+  # Simulate observations given the state
+  mu <- c(mu_background, mu_background + mu_signal)
+  y <- rpois(N, mu[states])
+  
+  list(
+    variables = list(
+      mu_background = mu_background,
+      mu_signal = mu_signal,
+      # rdirichlet returns matrices, convert to 1D vectors
+      t1 = as.numeric(t1),
+      t2 = as.numeric(t2),
+      rho = as.numeric(rho)
+    ),
+    generated = list(
+      N = N,
+      y = y
+    )
+  )
+}
+

And here is the Stan code that models this process (it is based on the example from the HMM case study but simplified and modified).

+
+cat(readLines("stan/hmm_poisson.stan"), sep = "\n")
+
data {
+  int N; // Number of observations
+  array[N] int y;
+}
+parameters {
+  // Parameters of measurement model
+  real<lower=0> mu_background;
+  real<lower=0> mu_signal;
+
+  // Initial state
+  simplex[2] rho;
+
+  // Rows of the transition matrix
+  simplex[2] t1;
+  simplex[2] t2;
+}
+
+model {
+
+  matrix[2, 2] Gamma;
+  matrix[2, N] log_omega;
+
+  // Build the transition matrix
+  Gamma[1, : ] = t1';
+  Gamma[2, : ] = t2';
+
+  // Compute the log likelihoods in each possible state
+  for (n in 1 : N) {
+    // The observation model could change with n, or vary in a number of
+    //  different ways (which is why log_omega is passed in as an argument)
+    log_omega[1, n] = poisson_lpmf(y[n] | mu_background);
+    log_omega[2, n] = poisson_lpmf(y[n] | mu_background + mu_signal);
+  }
+
+  mu_background ~ lognormal(-2, 1);
+  mu_signal ~ lognormal(2, 1);
+
+  // Initial state - we're quite sure we started with the source working
+  rho ~ dirichlet([1, 10]);
+
+  t1 ~ dirichlet([3, 3]);
+  t2 ~ dirichlet([3, 3]);
+
+  target += hmm_marginal(log_omega, Gamma, rho);
+}
+
+

Default ADVI +

+

We start with the default (meanfield) variational backend via Stan:

+
+if(package_version(cmdstanr::cmdstan_version()) < package_version("2.26.0") ) {
+  stop("The models int this section require CmdStan 2.26 or later.")
+}
+model_HMM <- cmdstan_model("stan/hmm_poisson.stan")
+backend_HMM <- SBC_backend_cmdstan_variational(model_HMM, n_retries_init = 3)
+

Since we are feeling confident that our model is implemented correctly (and the model runs quickly), we start with 100 simulations and assume 100 observations for each. If you are developing a new model, it might be useful to start with fewer simulations, as discussed in the small model workflow vignette.

+

And we compute results

+
+set.seed(642354822)
+ds_hmm <- generate_datasets(SBC_generator_function(generator_HMM, N = 100), n_sims = 100)
+res_hmm <- compute_SBC(ds_hmm, backend_HMM,
+                           cache_mode = "results", cache_location = file.path(cache_dir, "hmm"))
+
## Results loaded from cache file 'hmm'
+

There are not huge problems, but the mu_signal variable seems to not be well calibrated:

+
+plot_ecdf_diff(res_hmm)
+

+
+plot_rank_hist(res_hmm)
+

+

We may also look at the observed coverage of central intervals - we see that for mu_signal the approximation tends to be overconfident for the wider intervals.

+
+plot_coverage(res_hmm)
+

+

To make sure this is not a fluke we add 400 more simulations.

+
+set.seed(2254355)
+ds_hmm_2 <- generate_datasets(SBC_generator_function(generator_HMM, N = 100), n_sims = 400)
+
+res_hmm_2 <- bind_results(
+  res_hmm,
+  compute_SBC(ds_hmm_2,backend_HMM,
+                  cache_mode = "results",
+                  cache_location = file.path(cache_dir, "hmm2"))
+)
+
## Results loaded from cache file 'hmm2'
+

This confirms the problems with mu_signal. additionally, we see that mu_background and the rho variables also show some irregularities.

+
+plot_ecdf_diff(res_hmm_2)
+

+
+plot_rank_hist(res_hmm_2)
+

+

Looking at the observed coverage, both mu_background and mu_signal are now clearly somewhat overconfident for the wider intervals.

+
+plot_coverage(res_hmm_2)
+

+

This is what we get when we focus on the 90% posterior credible interval:

+
+coverage_hmm <- empirical_coverage(res_hmm_2$stats, width = 0.9)[, c("variable", "ci_low", "ci_high")]
+coverage_hmm
+
## # A tibble: 8 x 3
+##   variable      ci_low ci_high
+##   <chr>          <dbl>   <dbl>
+## 1 mu_background  0.822   0.884
+## 2 mu_signal      0.754   0.825
+## 3 rho[1]         0.864   0.918
+## 4 rho[2]         0.864   0.918
+## 5 t1[1]          0.803   0.868
+## 6 t1[2]          0.803   0.868
+## 7 t2[1]          0.838   0.897
+## 8 t2[2]          0.838   0.897
+

So the 90% central credible interval for mu_signal likely contains less than 83% of true values.

+

For a crude result, the default ADVI setup we just tested is not terrible: we don’t expect to see a strong bias and the model will be somewhat overconfident, but not catastrophically so.

+

Note that when the user is aiming for a point estimate of mean or other central tendency, a summary of VI posterior may provide a good point estimate even when the uncertainty is miscalibrated. VSBC, a diagnostic that concentrates on bias in marginal quantity, was developed to test this (Yao et. al., 2018), but is currently not implemented in our package (see https://github.com/hyunjimoon/SBC/issues/60 for progress). Other diagnostic such as PSIS-based which is associated with specific data and test quantity, is less flexible for target-testing.

+
+
+

Full-rank +

+

We may try if the situation improves with full-rank ADVI - let’s run it for the same datasets.

+
+ds_hmm_all <- bind_datasets(ds_hmm, ds_hmm_2)
+res_hmm_fullrank <- compute_SBC(
+  ds_hmm_all, 
+  SBC_backend_cmdstan_variational(model_HMM, algorithm = "fullrank", n_retries_init = 3),
+  cache_mode = "results", cache_location = file.path(cache_dir, "hmm_fullrank"))
+
## Results loaded from cache file 'hmm_fullrank'
+

We still have problems, but different ones (and arguably somewhat less severe):

+
+plot_ecdf_diff(res_hmm_fullrank)
+

+
+plot_rank_hist(res_hmm_fullrank)
+

+

Interestingly, the rank plot for mu_signal shows a “frowning” shape, meaning the mean-field approximation is slightly underconfident here.

+

This is nicely demonstrated by looking at the central interval coverage - now the coverage of mu_signal is larger than it should be, so the model is underconfident (i.e. more conservative), while the coverages for other variables track the nominal values quite closely.

+
+plot_coverage(res_hmm_fullrank)
+

+

Or alternatively looking at the numerical values for coverage of the central 90% interval

+
+coverage_hmm_fullrank <- 
+  empirical_coverage(res_hmm_fullrank$stats, width = 0.9)[, c("variable", "ci_low", "ci_high")]
+coverage_hmm_fullrank
+
## # A tibble: 8 x 3
+##   variable      ci_low ci_high
+##   <chr>          <dbl>   <dbl>
+## 1 mu_background  0.851   0.907
+## 2 mu_signal      0.891   0.939
+## 3 rho[1]         0.875   0.927
+## 4 rho[2]         0.875   0.927
+## 5 t1[1]          0.868   0.922
+## 6 t1[2]          0.868   0.922
+## 7 t2[1]          0.879   0.930
+## 8 t2[2]          0.879   0.930
+

This pattern where the default meanfield approximation is overconfident and the fullrank approximation is underconfident is in fact quite frequently seen, which motivated some experiments with a low rank approximation that would fall in between those, but as of early 2022 this is not ready for use in Stan.

+
+
+

Meanfield + lower tolerance +

+

In some cases, it might also help to reduce the tolerance (tol_rel_obj) of the algorithm. This is a restriction on evidence lower bound (ELBO) for tighter optimization convergence. Here we’ll use the default mean-field algorithm, but decrease the tol_rel_obj (the default value is 0.01). So let’s try that.

+
+res_hmm_lowtol <- compute_SBC(
+  ds_hmm_all, 
+  SBC_backend_cmdstan_variational(model_HMM, tol_rel_obj = 0.001, n_retries_init = 3),
+  cache_mode = "results", cache_location = file.path(cache_dir, "hmm_lowtol"))
+
## Results loaded from cache file 'hmm_lowtol'
+
##  - 14 (3%) of fits did not converge.
+
## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+

Reducing tolerance leads to a small proportion of non-converging fits. In theory, increasing grad_samples improve non-convergence but in our experience, current ADVI (2021) convergence does not easily change with this adjustment. Also, since the non-converged cases are relatively rare, we’ll just remove the non-converging fits from the SBC results (this is OK as long as we would discard non-converging fits for real data, see the rejection sampling vignette).

+
+res_hmm_lowtol_conv <-
+    res_hmm_lowtol[res_hmm_lowtol$backend_diagnostics$elbo_converged] 
+
+plot_ecdf_diff(res_hmm_lowtol_conv)
+

+
+plot_rank_hist(res_hmm_lowtol_conv)
+

+

The problems seem to have become even less pronounced. We may once again inspect the observed coverage of central intervals

+
+plot_coverage(res_hmm_lowtol_conv)
+

+

and the numerical values for the coverage of the central 90% interval.

+
+empirical_coverage(res_hmm_lowtol$stats, width = 0.9)[, c("variable", "ci_low", "ci_high")]
+
## # A tibble: 8 x 3
+##   variable      ci_low ci_high
+##   <chr>          <dbl>   <dbl>
+## 1 mu_background  0.827   0.888
+## 2 mu_signal      0.831   0.891
+## 3 rho[1]         0.882   0.932
+## 4 rho[2]         0.882   0.932
+## 5 t1[1]          0.814   0.877
+## 6 t1[2]          0.814   0.877
+## 7 t2[1]          0.844   0.902
+## 8 t2[2]          0.844   0.902
+

This variant has somewhat lower overall mismatch, but tends to be overconfident, which might in some cases be less desirable than the more conservative fullrank.

+
+
+

Optimizing +

+

Would optimizing provide sensible results in this case? We build an optimizng backend and run it.

+
+SBC:::require_package_version("rstan", "2.26", "The models in the following sections need more recent rstan than what is available on CRAN - use https://mc-stan.org/r-packages/ to get it")
+
+model_HMM_rstan <- stan_model("stan/hmm_poisson.stan")
+
+res_hmm_optimizing <- compute_SBC(
+  ds_hmm_all, 
+  SBC_backend_rstan_optimizing(model_HMM_rstan, n_retries_hessian = 3),
+  cache_mode = "results", cache_location = file.path(cache_dir, "hmm_optimizing"))
+
## Results loaded from cache file 'hmm_optimizing'
+
##  - 1 (0%) of fits required multiple attempts to produce usable Hessian.
+
## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+

We see that while for some variables (mu_signal, the transition probabilities t[]), the Laplace approximation is reasonably well calibrated, it is very badly calibrated with respect to the initial states rho and also for mu_background, where there is substantial bias. So if we were only interested in a subset of the variables, the optimizing fit could still be on OK choice.

+
+plot_ecdf_diff(res_hmm_optimizing)
+

+
+plot_rank_hist(res_hmm_optimizing)
+

+
+
+

Summary +

+

To summarise, the HMM model turns out to pose minor problems for ADVI that can be partially resolved by tweaking the parameters of the ADVI algorithm. Just using optimizing results in much worse calibration than ADVI.

+

Another relevant question is how much speed we gained. To have a comparison, we run full MCMC with Stan for the same datasets.

+
+res_hmm_sample <- compute_SBC(
+  ds_hmm[1:50], 
+  SBC_backend_cmdstan_sample(model_HMM),
+  keep_fits = FALSE,
+  cache_mode = "results", cache_location = file.path(cache_dir, "hmm_sample"))
+
## Results loaded from cache file 'hmm_sample'
+
##  - 1 (2%) fits had at least one Rhat > 1.01. Largest Rhat was 1.019.
+
##  - 1 (2%) fits had tail ESS undefined or less than half of the maximum rank, potentially skewing 
+## the rank statistics. The lowest tail ESS was 154.
+##  If the fits look good otherwise, increasing `thin_ranks` (via recompute_SBC_statistics) 
+## or number of posterior draws (by refitting) might help.
+
##  - 1 (2%) fits had divergent transitions. Maximum number of divergences was 51.
+
## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+

We get a small number of problematic fits, which we will ignore for now. We check that there are no obvious calibration problems:

+
+plot_ecdf_diff(res_hmm_sample)
+

+
+plot_rank_hist(res_hmm_sample)
+

+

For the machine we built the vignette on, here are the distributions of times (for ADVI and optimizing) and time of longest chain (for HMC):

+
+hmm_time <- 
+  rbind(
+      data.frame(alg = "Optimizing", 
+                     time = res_hmm_optimizing$backend_diagnostics$time),
+      data.frame(alg = "Meanfield", 
+                     time = res_hmm$backend_diagnostics$time),
+        data.frame(alg = "Fullrank", 
+                   time = res_hmm_fullrank$backend_diagnostics$time),
+        data.frame(alg = "Meanfield + low tol.", 
+                   time = res_hmm_lowtol$backend_diagnostics$time),
+        data.frame(alg = "Sampling (longest chain)", 
+                   time = res_hmm_sample$backend_diagnostics$max_chain_time))
+
+max_time_optimizing <- round(max(res_hmm_optimizing$backend_diagnostics$time), 2)
+
+
+hmm_time$alg <- factor(hmm_time$alg, 
+                       levels = c("Optimizing",
+                                  "Meanfield",
+                                  "Fullrank",
+                                  "Meanfield + low tol.",
+                                  "Sampling (longest chain)"))
+ 
+ggplot(hmm_time, aes(x = time)) + 
+  geom_histogram(aes(y = ..density..), bins = 20) + 
+  facet_wrap(~alg, ncol = 1) +
+  scale_x_continuous("Time [seconds]")
+

+

Depressingly, while using lower tolerance let us get almost as good uncertainty quantification as sampling, it also erased a big part of the performance advantage variational inference had over sampling for this model. However, both the fullrank and meanfield approximations provide not-terrible estimates and are noticeably faster than sampling. Optimizing is by far the fastest as the longest time observed is just 1.58 seconds.

+
+
+
+

Example III - Hidden Markov Model, ordered variant +

+

Unforutnately, ADVI as implemented in Stan can be quite fragile. Let us consider a very small change to the HMM model from the previous section: let us model the means of the counts for the two states directly (the previous version modelled the background state and the difference between the two states) and move to the log scale. So instead of mu_background and mu_signal we have an ordered vector log_mu:

+
+cat(readLines("stan/hmm_poisson_ordered.stan"), sep = "\n")
+
data {
+  int N; // Number of observations
+  array[N] int y;
+}
+parameters {
+  // Parameters of measurement model
+  ordered[2] log_mu;
+
+  // Initial state
+  simplex[2] rho;
+
+  // Rows of the transition matrix
+  simplex[2] t1;
+  simplex[2] t2;
+}
+
+model {
+
+  matrix[2, 2] Gamma;
+  matrix[2, N] log_omega;
+
+  // Build the transition matrix
+  Gamma[1, : ] = t1';
+  Gamma[2, : ] = t2';
+
+  // Compute the log likelihoods in each possible state
+  for (n in 1 : N) {
+    // The observation model could change with n, or vary in a number of
+    //  different ways (which is why log_omega is passed in as an argument)
+    log_omega[1, n] = poisson_log_lpmf(y[n] | log_mu[1]);
+    log_omega[2, n] = poisson_log_lpmf(y[n] | log_mu[2]);
+  }
+
+  log_mu[1] ~ normal(-2, 1);
+  log_mu[2] ~ normal(2, 1);
+
+  // Initial state - we're quite sure we started with the source working
+  rho ~ dirichlet([1, 10]);
+
+  t1 ~ dirichlet([3, 3]);
+  t2 ~ dirichlet([3, 3]);
+
+  target += hmm_marginal(log_omega, Gamma, rho);
+}
+
+
+generated quantities {
+  positive_ordered[2] mu = exp(log_mu);
+}
+

This model is almost identical - in theory the only difference is that it implies a slightly different prior on the active (higher mean) state. Here is how we can generate data with this mildly different prior (we need rejection sampling to fulfill the ordering constraint):

+
+generator_HMM_ordered <- function(N) {
+  
+  # Rejection sampling for ordered mu with the correct priors
+  repeat {
+    log_mu <- c(rnorm(1, -2, 1), rnorm(1, 2, 1))
+    if(log_mu[1] < log_mu[2]) {
+      break;
+    }
+  }
+  
+  mu <- exp(log_mu)
+
+  # Draw the transition probabilities
+  t1 <- MCMCpack::rdirichlet(1, c(3, 3))
+  t2 <- MCMCpack::rdirichlet(1, c(3, 3))
+
+  states = rep(NA_integer_, N)
+  # Draw from initial state distribution
+  rho <- MCMCpack::rdirichlet(1, c(1, 10))
+
+  states[1] = sample(1:2, size = 1, prob = rho)
+  for(n in 2:length(states)) {
+    if(states[n - 1] == 1)
+      states[n] = sample(c(1, 2), size = 1, prob = t1)
+    else if(states[n - 1] == 2)
+      states[n] = sample(c(1, 2), size = 1, prob = t2)
+  }  
+  
+  y <- rpois(N, mu[states])
+  
+  list(
+    variables = list(
+      log_mu = log_mu,
+      # rdirichlet returns matrices, convert to 1D vectors
+      t1 = as.numeric(t1),
+      t2 = as.numeric(t2),
+      rho = as.numeric(rho)
+    ),
+    generated = list(
+      N = N,
+      y = y
+    )
+  )
+}
+

So let us build a default variational backend and fit it to just 20 simulations.

+
+model_HMM_ordered <- cmdstan_model("stan/hmm_poisson_ordered.stan")
+backend_HMM_ordered <- SBC_backend_cmdstan_variational(model_HMM_ordered, n_retries_init = 3)
+
+set.seed(12333654)
+ds_hmm_ordered <- generate_datasets(
+  SBC_generator_function(generator_HMM_ordered, N = 100), 
+  n_sims = 20)
+
+res_hmm_ordered <- 
+  compute_SBC(ds_hmm_ordered, backend_HMM_ordered,
+                  cache_mode = "results", cache_location = file.path(cache_dir, "hmm_ordered"))
+
## Results loaded from cache file 'hmm_ordered'
+

Immediately we see that the log_mu[1] variable is heavily miscalibrated.

+
+plot_ecdf_diff(res_hmm_ordered)
+

+
+plot_rank_hist(res_hmm_ordered)
+

+

What changed? To understand that we need to remember how Stan represents constrained data types. In short, in the model in Example II, Stan will internally work with the so called unconstrained parameters mu_background__ = log(mu_background) and mu_signal__ = log(mu_signal). In this modified model, the internal representation will be: log_mu_1__ = log_mu[1] (without any change) and log_mu_2__ = log(log_mu[2] - log_mu[1]). So the mean for the active component is actually exp(log_mu_1__ + exp(log_mu_2__)). This then introduces a complex correlation structure between the unconstrained parameters that the ADVI algorithm is unable to handle well.

+

Even trying the fullrank variant does not help:

+
+backend_HMM_ordered_fullrank <- 
+  SBC_backend_cmdstan_variational(model_HMM_ordered,
+                                  algorithm = "fullrank", n_retries_init = 3)
+
+res_hmm_ordered_fullrank <- 
+  compute_SBC(ds_hmm_ordered, backend_HMM_ordered,
+                  cache_mode = "results", cache_location = file.path(cache_dir, "hmm_ordered_fullrank"))
+
## Results loaded from cache file 'hmm_ordered_fullrank'
+

The results are still strongly miscalibrated.

+
+plot_ecdf_diff(res_hmm_ordered_fullrank)
+

+
+plot_rank_hist(res_hmm_ordered_fullrank)
+

+

To have a complete overview we may also try the optimizing fit:

+
+model_HMM_ordered_rstan <- stan_model("stan/hmm_poisson_ordered.stan")
+
+res_hmm_ordered_optimizing <- compute_SBC(
+  ds_hmm_ordered, 
+  SBC_backend_rstan_optimizing(model_HMM_ordered_rstan),
+  cache_mode = "results", cache_location = file.path(cache_dir, "hmm_ordered_optimizing"))
+
## Results loaded from cache file 'hmm_ordered_optimizing'
+

in this case, optimizing has better calibration for log_mu, but worse calibration for rho than ADVI.

+
+plot_ecdf_diff(res_hmm_ordered_optimizing)
+

+
+plot_rank_hist(res_hmm_ordered_optimizing)
+

+
+
+

Conclusion +

+

As this vignette has shown, for some models, ADVI will provide results that are close to what we get with sampling, but it may also fail catastrophically on models that are just slightly different. Tweaking the algorithm parameters might also be necessary. For some cases where ADVI works, the Laplace approximation with optimizing will also work well. ADVI (and optimizng) cannot thus be used blindly. Fortunately SBC can be used to check against this type of problem without ever needing to run the full sampling.

+
+
+

Next step: Evolving computation and diagnostic. +

+

In computational_algorithm2, we will focus on hopeful aspects of approximate computation. The adversarial relation between computation and diagnostic is introduced based on which mutual evolvement happens. This can give insight to computational algorithm designers aiming to pass SBC. For illustration, when and how VI can be used is discussed which include customized SBC (e.g. VSBC) and first or second-order correction.

+
+
+

References +

+ +
+
+ + + +
+ + + +
+ +
+

+

Site built with pkgdown 2.0.1.

+
+ +
+
+ + + + + + + + diff --git a/docs/articles/computational_algorithm1_files/figure-html/ecdf_rank_poisson-1.png b/docs/articles/computational_algorithm1_files/figure-html/ecdf_rank_poisson-1.png new file mode 100644 index 0000000..484850a Binary files /dev/null and b/docs/articles/computational_algorithm1_files/figure-html/ecdf_rank_poisson-1.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/ecdf_rank_poisson-2.png b/docs/articles/computational_algorithm1_files/figure-html/ecdf_rank_poisson-2.png new file mode 100644 index 0000000..83dadb6 Binary files /dev/null and b/docs/articles/computational_algorithm1_files/figure-html/ecdf_rank_poisson-2.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/ecdf_rank_poisson_100-1.png b/docs/articles/computational_algorithm1_files/figure-html/ecdf_rank_poisson_100-1.png new file mode 100644 index 0000000..444be5e Binary files /dev/null and b/docs/articles/computational_algorithm1_files/figure-html/ecdf_rank_poisson_100-1.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/ecdf_rank_poisson_100-2.png b/docs/articles/computational_algorithm1_files/figure-html/ecdf_rank_poisson_100-2.png new file mode 100644 index 0000000..a5bcfb2 Binary files /dev/null and b/docs/articles/computational_algorithm1_files/figure-html/ecdf_rank_poisson_100-2.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/ecdf_rank_poisson_optimizing-1.png b/docs/articles/computational_algorithm1_files/figure-html/ecdf_rank_poisson_optimizing-1.png new file mode 100644 index 0000000..807eefc Binary files /dev/null and b/docs/articles/computational_algorithm1_files/figure-html/ecdf_rank_poisson_optimizing-1.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/ecdf_rank_poisson_optimizing-2.png b/docs/articles/computational_algorithm1_files/figure-html/ecdf_rank_poisson_optimizing-2.png new file mode 100644 index 0000000..71aedfb Binary files /dev/null and b/docs/articles/computational_algorithm1_files/figure-html/ecdf_rank_poisson_optimizing-2.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/ecdf_rank_poisson_optimizing_100-1.png b/docs/articles/computational_algorithm1_files/figure-html/ecdf_rank_poisson_optimizing_100-1.png new file mode 100644 index 0000000..f66ca4c Binary files /dev/null and b/docs/articles/computational_algorithm1_files/figure-html/ecdf_rank_poisson_optimizing_100-1.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/ecdf_rank_poisson_optimizing_100-2.png b/docs/articles/computational_algorithm1_files/figure-html/ecdf_rank_poisson_optimizing_100-2.png new file mode 100644 index 0000000..5bd52ba Binary files /dev/null and b/docs/articles/computational_algorithm1_files/figure-html/ecdf_rank_poisson_optimizing_100-2.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_2_coverage-1.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_2_coverage-1.png new file mode 100644 index 0000000..8792ac2 Binary files /dev/null and b/docs/articles/computational_algorithm1_files/figure-html/hmm_2_coverage-1.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_2_ecdf_ranks-1.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_2_ecdf_ranks-1.png new file mode 100644 index 0000000..2084036 Binary files /dev/null and b/docs/articles/computational_algorithm1_files/figure-html/hmm_2_ecdf_ranks-1.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_2_ecdf_ranks-2.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_2_ecdf_ranks-2.png new file mode 100644 index 0000000..1429662 Binary files /dev/null and b/docs/articles/computational_algorithm1_files/figure-html/hmm_2_ecdf_ranks-2.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_coverage-1.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_coverage-1.png new file mode 100644 index 0000000..36a1652 Binary files /dev/null and b/docs/articles/computational_algorithm1_files/figure-html/hmm_coverage-1.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_ecdf_ranks-1.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_ecdf_ranks-1.png new file mode 100644 index 0000000..a46ceee Binary files /dev/null and b/docs/articles/computational_algorithm1_files/figure-html/hmm_ecdf_ranks-1.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_ecdf_ranks-2.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_ecdf_ranks-2.png new file mode 100644 index 0000000..6093784 Binary files /dev/null and b/docs/articles/computational_algorithm1_files/figure-html/hmm_ecdf_ranks-2.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_fullrank_coverage-1.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_fullrank_coverage-1.png new file mode 100644 index 0000000..7e749b8 Binary files /dev/null and b/docs/articles/computational_algorithm1_files/figure-html/hmm_fullrank_coverage-1.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_fullrank_ecdf_ranks-1.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_fullrank_ecdf_ranks-1.png new file mode 100644 index 0000000..0917c57 Binary files /dev/null and b/docs/articles/computational_algorithm1_files/figure-html/hmm_fullrank_ecdf_ranks-1.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_fullrank_ecdf_ranks-2.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_fullrank_ecdf_ranks-2.png new file mode 100644 index 0000000..64a94a6 Binary files /dev/null and b/docs/articles/computational_algorithm1_files/figure-html/hmm_fullrank_ecdf_ranks-2.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_lowtol_coverage-1.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_lowtol_coverage-1.png new file mode 100644 index 0000000..931d4da Binary files /dev/null and b/docs/articles/computational_algorithm1_files/figure-html/hmm_lowtol_coverage-1.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_lowtol_ecdf_ranks-1.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_lowtol_ecdf_ranks-1.png new file mode 100644 index 0000000..c8117d3 Binary files /dev/null and b/docs/articles/computational_algorithm1_files/figure-html/hmm_lowtol_ecdf_ranks-1.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_lowtol_ecdf_ranks-2.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_lowtol_ecdf_ranks-2.png new file mode 100644 index 0000000..1043453 Binary files /dev/null and b/docs/articles/computational_algorithm1_files/figure-html/hmm_lowtol_ecdf_ranks-2.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_optimizing_ecdf_ranks-1.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_optimizing_ecdf_ranks-1.png new file mode 100644 index 0000000..3b3b630 Binary files /dev/null and b/docs/articles/computational_algorithm1_files/figure-html/hmm_optimizing_ecdf_ranks-1.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_optimizing_ecdf_ranks-2.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_optimizing_ecdf_ranks-2.png new file mode 100644 index 0000000..a07398e Binary files /dev/null and b/docs/articles/computational_algorithm1_files/figure-html/hmm_optimizing_ecdf_ranks-2.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_ordered_ecdf_ranks-1.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_ordered_ecdf_ranks-1.png new file mode 100644 index 0000000..68b27f6 Binary files /dev/null and b/docs/articles/computational_algorithm1_files/figure-html/hmm_ordered_ecdf_ranks-1.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_ordered_ecdf_ranks-2.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_ordered_ecdf_ranks-2.png new file mode 100644 index 0000000..18cf158 Binary files /dev/null and b/docs/articles/computational_algorithm1_files/figure-html/hmm_ordered_ecdf_ranks-2.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_ordered_fullrank_ecdf_ranks-1.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_ordered_fullrank_ecdf_ranks-1.png new file mode 100644 index 0000000..2a60bfc Binary files /dev/null and b/docs/articles/computational_algorithm1_files/figure-html/hmm_ordered_fullrank_ecdf_ranks-1.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_ordered_fullrank_ecdf_ranks-2.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_ordered_fullrank_ecdf_ranks-2.png new file mode 100644 index 0000000..f7aad01 Binary files /dev/null and b/docs/articles/computational_algorithm1_files/figure-html/hmm_ordered_fullrank_ecdf_ranks-2.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_ordered_optimizing_ecdf_ranks-1.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_ordered_optimizing_ecdf_ranks-1.png new file mode 100644 index 0000000..e7bda69 Binary files /dev/null and b/docs/articles/computational_algorithm1_files/figure-html/hmm_ordered_optimizing_ecdf_ranks-1.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_ordered_optimizing_ecdf_ranks-2.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_ordered_optimizing_ecdf_ranks-2.png new file mode 100644 index 0000000..c4e00a0 Binary files /dev/null and b/docs/articles/computational_algorithm1_files/figure-html/hmm_ordered_optimizing_ecdf_ranks-2.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_sample_ecdf_ranks-1.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_sample_ecdf_ranks-1.png new file mode 100644 index 0000000..27d16ec Binary files /dev/null and b/docs/articles/computational_algorithm1_files/figure-html/hmm_sample_ecdf_ranks-1.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_sample_ecdf_ranks-2.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_sample_ecdf_ranks-2.png new file mode 100644 index 0000000..55d15e0 Binary files /dev/null and b/docs/articles/computational_algorithm1_files/figure-html/hmm_sample_ecdf_ranks-2.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_time-1.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_time-1.png new file mode 100644 index 0000000..87f0096 Binary files /dev/null and b/docs/articles/computational_algorithm1_files/figure-html/hmm_time-1.png differ diff --git a/docs/articles/discrete_params.html b/docs/articles/discrete_params.html deleted file mode 100644 index 8f08061..0000000 --- a/docs/articles/discrete_params.html +++ /dev/null @@ -1,383 +0,0 @@ - - - - - - - -SBC with discrete parameters • SBC - - - - - - - - - - -
-
- - - - -
-
- - - - -

SBC was primarily designed for continuous parameters, but can be used with models that have discrete parameters - whether the parameters are directly represented (e.g. in JAGS) or marginalized out (as is usual in Stan).

-
-library(SBC); 
-library(ggplot2)
-
-use_cmdstanr <- TRUE # Set to false to use rstan instead
-
-if(use_cmdstanr) {
-  library(cmdstanr)
-} else {
-  library(rstan)
-}
-
-# Multiprocessing support
-library(future)
-plan(multisession)
-
-# The fits are very fast and we fit just a few, 
-# so we force a minimum chunk size to reduce overhead of
-# paralellization and decrease computation time.
-options(SBC.min_chunk_size = 5)
-
-# Setup caching of results
-cache_dir <- "./discrete_params_SBC_cache"
-if(!dir.exists(cache_dir)) {
-  dir.create(cache_dir)
-}
-

We take the changepoint model from: https://mc-stan.org/docs/2_26/stan-users-guide/change-point-section.html

-
-cat(readLines("stan/discrete_params1.stan"), sep = "\n")
-
data {
-  real<lower=0> r_e;
-  real<lower=0> r_l;
-
-  int<lower=1> T;
-  int<lower=0> y[T];
-}
-transformed data {
-  real log_unif;
-  log_unif = -log(T);
-}
-parameters {
-  real<lower=0> e;
-  real<lower=0> l;
-}
-transformed parameters {
-  vector[T] lp;
-  lp = rep_vector(log_unif, T);
-  for (s in 1:T)
-    for (t in 1:T)
-      lp[s] = lp[s] + poisson_lpmf(y[t] | t < s ? e : l);
-}
-model {
-  e ~ exponential(r_e);
-  l ~ exponential(r_l);
-  target += log_sum_exp(lp);
-}
-
-generated quantities {
-  int<lower=1,upper=T> s;
-  s = categorical_logit_rng(lp);
-}
-
-if(use_cmdstanr) {
-  model_1 <- cmdstan_model("stan/discrete_params1.stan")
-  backend_1 <- SBC_backend_cmdstan_sample(model_1)
-} else {
-  model_1 <- stan_model("stan/discrete_params1.stan")
-  backend_1 <- SBC_backend_rstan_sample(model_1)
-}
-

Now, let’s generate data from the model.

-
-generate_single_dataset_1 <- function(T, r_e, r_l) {
-  e <- rexp(1, r_e)
-  l <- rexp(1, r_l)
-  s <- sample.int(T, size = 1)
-  
-  y <- array(NA_real_, T)
-  for(t in 1:T) {
-    if(t <= s) {
-      rate <- e
-    } else {
-      rate <- l
-    }
-    y[t] <- rpois(1, rate) 
-  }
-  
-  list(
-    parameters = list(
-      e = e, l = l, s = s
-    ), generated = list(
-      T = T,
-      r_e = r_e,
-      r_l = r_l,
-      y = y
-    )
-  )
-}
-
-generator_1 <- SBC_generator_function(generate_single_dataset_1, T = 5, r_e = 0.5, r_l = 0.1)
-
-set.seed(85394672)
-datasets_1 <- generate_datasets(generator_1, 30)
-
-results_1 <- compute_results(datasets_1, backend_1, 
-                    cache_mode = "results", 
-                    cache_location = file.path(cache_dir, "model1"))
-
## Results loaded from cache file 'model1'
-
##  - 7 (23%) fits had at least one Rhat > 1.01. Largest Rhat was NA.
-
##  - 25 (83%) fits had tail ESS undefined or less than half of the maximum rank, potentially skewing 
-## the rank statistics. The lowest tail ESS was NA.
-##  If the fits look good otherwise, increasing `thin_ranks` (via recompute_statistics) 
-## or number of posterior samples (by refitting) might help.
-
##  - 2 (7%) fits had divergent transitions. Maximum number of divergences was 3.
-
## Not all diagnostics are OK.
-## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
-## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
-

Here we also use the caching feature to avoid recomputing the fits when recompiling this vignette. In practice, caching is not necessary but is often useful.

-

TODO the diagnostic failures are false positives, because Rhat and ESS don’t work very well for discrete parameters. We need to figure out how to handle this better.

-

We can quickly note that the statistics for the s parameter are extreme - many ranks of 0 and extreme z-scores, including -Infinity. Seing just one or two such fits should be enough to convince us that there is something fundamentally wrong.

-
-dplyr::filter(results_1$stats, parameter == "s") 
-
## # A tibble: 30 x 15
-##    parameter simulated_value  rank   z_score  mean median    sd   mad    q5
-##    <chr>               <dbl> <dbl>     <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
-##  1 s                       3   185    0.0182  2.97      3 1.61   2.97     1
-##  2 s                       1    24   -1.90    2.02      2 0.537  0        1
-##  3 s                       4   127   -1.37    4.67      5 0.489  0        4
-##  4 s                       1    10   -2.85    2.86      3 0.651  0        1
-##  5 s                       5   398    2.76    2.86      3 0.775  0        1
-##  6 s                       2   272    0.0449  1.94      1 1.42   0        1
-##  7 s                       3     0 -Inf       4         4 0      0        4
-##  8 s                       2   130   -0.594   2.87      3 1.46   1.48     1
-##  9 s                       2     0   -6.84    2.99      3 0.144  0        3
-## 10 s                       2     3   -8.68    3.00      3 0.115  0        3
-## # ... with 20 more rows, and 6 more variables: q95 <dbl>, rhat <dbl>,
-## #   ess_bulk <dbl>, ess_tail <dbl>, max_rank <int>, dataset_id <int>
-

Inspecting the statistics shows that quite often, the model is quite sure of the value of s while the simulated value is just one less.

-

Looking at the ecdf_diff plot we see that this seems to compromise heavily the inference for s, but the other parameters do not show such bad behaviour.

-
-plot_ecdf_diff(results_1)
-

-
-plot_rank_hist(results_1)
-

-

An important note: you may wonder, how we got such a wiggly line for the s parameter - doesn’t it have just 5 possible values? Shouldn’t therefore the ECDF be one big staircase? In fact the package does a little trick to make discrete parameters comparable to continuous - the rank of a discrete parameter is chosen uniformly randomly across all possible ranks (i.e. posterior draws that have exactly equal value). This means that if the model is well behaved, ranks for the discrete parameter will be uniformly distributed across the whole range of possible ranks and we can use exactly the same diagnostics for a discrete parameter as we do for the continuous ones.

-

But back to the model - what happened? What is wrong with it? After some inspection, you may notice that the simulator does not match the model - the model takes the early rate (e) for points t < s while the simulator takes e for points t <= s, so there is effectively a shift by one time point between the simulator and the model. So let’s assume that we beleive that the Stan model is in fact right. We therefore updated the simulator to match the model:

-
-generate_single_dataset_2 <- function(T, r_e, r_l) {
-  e <- rexp(1, r_e)
-  l <- rexp(1, r_l)
-  s <- sample.int(T, size = 1)
-  
-  y <- array(NA_real_, T)
-  for(t in 1:T) {
-    if(t < s) { ### <--- Only change here
-      rate <- e
-    } else {
-      rate <- l
-    }
-    y[t] <- rpois(1, rate) 
-  }
-  
-  list(
-    parameters = list(
-      e = e, l = l, s = s
-    ), generated = list(
-      T = T,
-      r_e = r_e,
-      r_l = r_l,
-      y = y
-    )
-  )
-}
-
-generator_2 <- SBC_generator_function(generate_single_dataset_2, T = 5, r_e = 0.5, r_l = 0.1)
-

And we can recompute:

-
-set.seed(5846502)
-datasets_2 <- generate_datasets(generator_2, 30)
-results_2 <- compute_results(datasets_2, backend_1, 
-                    cache_mode = "results", 
-                    cache_location = file.path(cache_dir, "model2"))
-
## Results loaded from cache file 'model2'
-
##  - 9 (30%) fits had at least one Rhat > 1.01. Largest Rhat was NA.
-
##  - 26 (87%) fits had tail ESS undefined or less than half of the maximum rank, potentially skewing 
-## the rank statistics. The lowest tail ESS was NA.
-##  If the fits look good otherwise, increasing `thin_ranks` (via recompute_statistics) 
-## or number of posterior samples (by refitting) might help.
-
##  - 2 (7%) fits had divergent transitions. Maximum number of divergences was 2.
-
## Not all diagnostics are OK.
-## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
-## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
-
-plot_rank_hist(results_2)
-

-
-plot_ecdf_diff(results_2)
-

-

Looks good, so let us add some more simulations to make sure the model behaves well.

-
-set.seed(54321488)
-datasets_3 <- generate_datasets(generator_2, 100)
-results_3 <- compute_results(datasets_3, backend_1, 
-                    cache_mode = "results", 
-                    cache_location = file.path(cache_dir, "model3"))
-
## Results loaded from cache file 'model3'
-
##  - 19 (19%) fits had at least one Rhat > 1.01. Largest Rhat was NA.
-
##  - 85 (85%) fits had tail ESS undefined or less than half of the maximum rank, potentially skewing 
-## the rank statistics. The lowest tail ESS was NA.
-##  If the fits look good otherwise, increasing `thin_ranks` (via recompute_statistics) 
-## or number of posterior samples (by refitting) might help.
-
##  - 7 (7%) fits had divergent transitions. Maximum number of divergences was 20.
-
## Not all diagnostics are OK.
-## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
-## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
-
-results_all <- bind_results(results_2, results_3)
-
-plot_rank_hist(results_all)
-

-
-plot_ecdf_diff(results_all)
-

-

Now - as far as this amount of SBC steps can see, the model is good and we get good behaviour for both the continuous and the discrete parameters.

-
- - - -
- - - -
- -
-

Site built with pkgdown 1.6.1.

-
- -
-
- - - - - - diff --git a/docs/articles/discrete_params_files/figure-html/results1_plots-1.png b/docs/articles/discrete_params_files/figure-html/results1_plots-1.png deleted file mode 100644 index 4e993e7..0000000 Binary files a/docs/articles/discrete_params_files/figure-html/results1_plots-1.png and /dev/null differ diff --git a/docs/articles/discrete_params_files/figure-html/results1_plots-2.png b/docs/articles/discrete_params_files/figure-html/results1_plots-2.png deleted file mode 100644 index 113b7f1..0000000 Binary files a/docs/articles/discrete_params_files/figure-html/results1_plots-2.png and /dev/null differ diff --git a/docs/articles/discrete_params_files/figure-html/results_2_plots-1.png b/docs/articles/discrete_params_files/figure-html/results_2_plots-1.png deleted file mode 100644 index 0b4585e..0000000 Binary files a/docs/articles/discrete_params_files/figure-html/results_2_plots-1.png and /dev/null differ diff --git a/docs/articles/discrete_params_files/figure-html/results_2_plots-2.png b/docs/articles/discrete_params_files/figure-html/results_2_plots-2.png deleted file mode 100644 index 3009b14..0000000 Binary files a/docs/articles/discrete_params_files/figure-html/results_2_plots-2.png and /dev/null differ diff --git a/docs/articles/discrete_params_files/figure-html/results_all_plots-1.png b/docs/articles/discrete_params_files/figure-html/results_all_plots-1.png deleted file mode 100644 index 14fb982..0000000 Binary files a/docs/articles/discrete_params_files/figure-html/results_all_plots-1.png and /dev/null differ diff --git a/docs/articles/discrete_params_files/figure-html/results_all_plots-2.png b/docs/articles/discrete_params_files/figure-html/results_all_plots-2.png deleted file mode 100644 index c4a135b..0000000 Binary files a/docs/articles/discrete_params_files/figure-html/results_all_plots-2.png and /dev/null differ diff --git a/docs/articles/discrete_params_files/header-attrs-2.10/header-attrs.js b/docs/articles/discrete_params_files/header-attrs-2.10/header-attrs.js deleted file mode 100644 index dd57d92..0000000 --- a/docs/articles/discrete_params_files/header-attrs-2.10/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/docs/articles/discrete_params_files/header-attrs-2.11/header-attrs.js b/docs/articles/discrete_params_files/header-attrs-2.11/header-attrs.js deleted file mode 100644 index dd57d92..0000000 --- a/docs/articles/discrete_params_files/header-attrs-2.11/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/docs/articles/discrete_vars.html b/docs/articles/discrete_vars.html new file mode 100644 index 0000000..1c9a23f --- /dev/null +++ b/docs/articles/discrete_vars.html @@ -0,0 +1,536 @@ + + + + + + + +SBC with discrete parameters in Stan and JAGS • SBC + + + + + + + + + + + + +
+
+ + + + +
+
+ + + + +

SBC was primarily designed for continuous parameters, but can be used with models that have discrete parameters - whether the parameters are directly represented (e.g. in BUGS/JAGS) or marginalized out (as is usual in Stan).

+
+

Stan version and debugging +

+
+library(SBC); 
+library(ggplot2)
+
+use_cmdstanr <- getOption("SBC.vignettes_cmdstanr", TRUE) # Set to false to use rstan instead
+
+if(use_cmdstanr) {
+  library(cmdstanr)
+} else {
+  library(rstan)
+  rstan_options(auto_write = TRUE)
+}
+
+# Multiprocessing support
+library(future)
+plan(multisession)
+
+# The fits are very fast and we fit just a few, 
+# so we force a minimum chunk size to reduce overhead of
+# paralellization and decrease computation time.
+options(SBC.min_chunk_size = 5)
+
+# Setup caching of results
+if(use_cmdstanr) {
+  cache_dir <- "./_discrete_vars_SBC_cache"
+} else {
+  cache_dir <- "./_discrete_vars_rstan_SBC_cache"
+}
+if(!dir.exists(cache_dir)) {
+  dir.create(cache_dir)
+}
+

We take the changepoint model from: https://mc-stan.org/docs/2_26/stan-users-guide/change-point-section.html

+
+cat(readLines("stan/discrete_vars1.stan"), sep = "\n")
+
data {
+  real<lower=0> r_e;
+  real<lower=0> r_l;
+
+  int<lower=1> T;
+  int<lower=0> y[T];
+}
+transformed data {
+  real log_unif;
+  log_unif = -log(T);
+}
+parameters {
+  real<lower=0> e;
+  real<lower=0> l;
+}
+transformed parameters {
+  vector[T] lp;
+  lp = rep_vector(log_unif, T);
+  for (s in 1:T)
+    for (t in 1:T)
+      lp[s] = lp[s] + poisson_lpmf(y[t] | t < s ? e : l);
+}
+model {
+  e ~ exponential(r_e);
+  l ~ exponential(r_l);
+  target += log_sum_exp(lp);
+}
+
+generated quantities {
+  int<lower=1,upper=T> s;
+  s = categorical_logit_rng(lp);
+}
+
+if(use_cmdstanr) {
+  model_1 <- cmdstan_model("stan/discrete_vars1.stan")
+  backend_1 <- SBC_backend_cmdstan_sample(model_1)
+} else {
+  model_1 <- stan_model("stan/discrete_vars1.stan")
+  backend_1 <- SBC_backend_rstan_sample(model_1)
+}
+

Now, let’s generate data from the model.

+
+generate_single_sim_1 <- function(T, r_e, r_l) {
+  e <- rexp(1, r_e)
+  l <- rexp(1, r_l)
+  s <- sample.int(T, size = 1)
+  
+  y <- array(NA_real_, T)
+  for(t in 1:T) {
+    if(t <= s) {
+      rate <- e
+    } else {
+      rate <- l
+    }
+    y[t] <- rpois(1, rate) 
+  }
+  
+  list(
+    variables = list(
+      e = e, l = l, s = s
+    ), generated = list(
+      T = T,
+      r_e = r_e,
+      r_l = r_l,
+      y = y
+    )
+  )
+}
+
+generator_1 <- SBC_generator_function(generate_single_sim_1, T = 5, r_e = 0.5, r_l = 0.1)
+
+set.seed(85394672)
+datasets_1 <- generate_datasets(generator_1, 30)
+

Additionally, we’ll add a generated quantity expressing the total log-likelihood of data given the fitted parameters. The expression within the generated_quantities() call is evaluated for both prior and posterior draws and included as another variable in SBC checks. It turns out this type of generated quantities can increase the sensitivity of the SBC against some issues in the model. See vignette("limits_of_SBC") for a more detailed discussion of this.

+
+log_lik_gq <- generated_quantities(log_lik = sum(dpois(y, ifelse(1:T < s, e, l), log = TRUE)))
+

So finally, lets actually compute SBC:

+
+results_1 <- compute_SBC(datasets_1, backend_1, 
+                    cache_mode = "results", 
+                    cache_location = file.path(cache_dir, "model1"),
+                    gen_quants = log_lik_gq)
+
## Results loaded from cache file 'model1'
+
##  - 4 (13%) fits had at least one Rhat > 1.01. Largest Rhat was NA.
+
##  - 20 (67%) fits had tail ESS undefined or less than half of the maximum rank, potentially skewing 
+## the rank statistics. The lowest tail ESS was NA.
+##  If the fits look good otherwise, increasing `thin_ranks` (via recompute_SBC_statistics) 
+## or number of posterior draws (by refitting) might help.
+
##  - 2 (7%) fits had divergent transitions. Maximum number of divergences was 3.
+
## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+

Here we also use the caching feature to avoid recomputing the fits when recompiling this vignette. In practice, caching is not necessary but is often useful.

+

TODO the diagnostic failures are false positives, because Rhat and ESS don’t work very well for discrete parameters. We need to figure out how to handle this better.

+

We can quickly note that the statistics for the s parameter are extreme - many ranks of 0 and extreme z-scores, including -Infinity. Seing just one or two such fits should be enough to convince us that there is something fundamentally wrong.

+
+dplyr::filter(results_1$stats, variable == "s") 
+
## # A tibble: 30 x 15
+##    sim_id variable simulated_value  rank   z_score  mean median    sd   mad
+##     <int> <chr>              <dbl> <dbl>     <dbl> <dbl>  <dbl> <dbl> <dbl>
+##  1      1 s                      3   186    0.0200  2.97      3 1.59   2.97
+##  2      2 s                      1    24   -1.90    2.02      2 0.537  0   
+##  3      3 s                      4   126   -1.37    4.67      5 0.489  0   
+##  4      4 s                      1    10   -2.85    2.86      3 0.651  0   
+##  5      5 s                      5   397    2.76    2.86      3 0.775  0   
+##  6      6 s                      2   290    0.118   1.84      1 1.35   0   
+##  7      7 s                      3     0 -Inf       4         4 0      0   
+##  8      8 s                      2   129   -0.594   2.87      3 1.46   1.48
+##  9      9 s                      2     0   -6.84    2.99      3 0.144  0   
+## 10     10 s                      2     3   -8.68    3.00      3 0.115  0   
+## # ... with 20 more rows, and 6 more variables: q5 <dbl>, q95 <dbl>, rhat <dbl>,
+## #   ess_bulk <dbl>, ess_tail <dbl>, max_rank <int>
+

Inspecting the statistics shows that quite often, the model is quite sure of the value of s while the simulated value is just one less.

+

Looking at the ecdf_diff plot we see that this seems to compromise heavily the inference for s, but the other parameters do not show such bad behaviour. Note that the log_lik generated quantity shows even starker failure than s, so it indeed poses a stricter check in this scenario.

+
+plot_ecdf_diff(results_1)
+

+
+plot_rank_hist(results_1)
+

+

An important note: you may wonder, how we got such a wiggly line for the s parameter - doesn’t it have just 5 possible values? Shouldn’t therefore the ECDF be one big staircase? In fact the package does a little trick to make discrete parameters comparable to continuous - the rank of a discrete parameter is chosen uniformly randomly across all possible ranks (i.e. posterior draws that have exactly equal value). This means that if the model is well behaved, ranks for the discrete parameter will be uniformly distributed across the whole range of possible ranks and we can use exactly the same diagnostics for a discrete parameter as we do for the continuous ones.

+

But back to the model - what happened? What is wrong with it? After some inspection, you may notice that the simulator does not match the model - the model takes the early rate (e) for points t < s while the simulator takes e for points t <= s, so there is effectively a shift by one time point between the simulator and the model. So let’s assume that we beleive that the Stan model is in fact right. We therefore updated the simulator to match the model:

+
+generate_single_sim_2 <- function(T, r_e, r_l) {
+  e <- rexp(1, r_e)
+  l <- rexp(1, r_l)
+  s <- sample.int(T, size = 1)
+  
+  y <- array(NA_real_, T)
+  for(t in 1:T) {
+    if(t < s) { ### <--- Only change here
+      rate <- e
+    } else {
+      rate <- l
+    }
+    y[t] <- rpois(1, rate) 
+  }
+  
+  list(
+    variables = list(
+      e = e, l = l, s = s
+    ), generated = list(
+      T = T,
+      r_e = r_e,
+      r_l = r_l,
+      y = y
+    )
+  )
+}
+
+generator_2 <- SBC_generator_function(generate_single_sim_2, T = 5, r_e = 0.5, r_l = 0.1)
+

And we can recompute:

+
+set.seed(5846502)
+datasets_2 <- generate_datasets(generator_2, 30)
+results_2 <- compute_SBC(datasets_2, backend_1,
+                    gen_quants = log_lik_gq, 
+                    cache_mode = "results", 
+                    cache_location = file.path(cache_dir, "model2"))
+
## Results loaded from cache file 'model2'
+
##  - 8 (27%) fits had at least one Rhat > 1.01. Largest Rhat was NA.
+
##  - 24 (80%) fits had tail ESS undefined or less than half of the maximum rank, potentially skewing 
+## the rank statistics. The lowest tail ESS was NA.
+##  If the fits look good otherwise, increasing `thin_ranks` (via recompute_SBC_statistics) 
+## or number of posterior draws (by refitting) might help.
+
##  - 2 (7%) fits had divergent transitions. Maximum number of divergences was 2.
+
## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+
+plot_rank_hist(results_2)
+

+
+plot_ecdf_diff(results_2)
+

+

Looks good, so let us add some more simulations to make sure the model behaves well.

+
+set.seed(54321488)
+datasets_2_more <- generate_datasets(generator_2, 100)
+results_2_more <- compute_SBC(datasets_2_more, backend_1,
+                    gen_quants = log_lik_gq, 
+                    cache_mode = "results", 
+                    cache_location = file.path(cache_dir, "model3"))
+
## Results loaded from cache file 'model3'
+
##  - 16 (16%) fits had at least one Rhat > 1.01. Largest Rhat was NA.
+
##  - 73 (73%) fits had tail ESS undefined or less than half of the maximum rank, potentially skewing 
+## the rank statistics. The lowest tail ESS was NA.
+##  If the fits look good otherwise, increasing `thin_ranks` (via recompute_SBC_statistics) 
+## or number of posterior draws (by refitting) might help.
+
##  - 8 (8%) fits had divergent transitions. Maximum number of divergences was 20.
+
## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+
+results_2_all <- bind_results(results_2, results_2_more)
+
+plot_rank_hist(results_2_all)
+

+
+plot_ecdf_diff(results_2_all)
+

+

Now - as far as this amount of SBC steps can see, the model is good and we get good behaviour for both the continuous and the discrete parameters and the log_lik generated quantity. Hooray!

+
+
+

JAGS version +

+

We can now write the same model in JAGS. This becomes a bit easier as JAGS lets us represent discrete parameters directly:

+
+cat(readLines("other_models/changepoint.jags"), sep = "\n")
+
data {
+  for(i in 1:T) {
+    prior_s[i] = 1.0/T
+  }
+}
+
+model {
+  e ~ dexp(r_e);
+  l ~ dexp(r_l);
+  s ~ dcat(prior_s)
+  for(i in 1:T) {
+      y[i] ~ dpois(ifelse(i < s, e, l))
+  }
+}
+

We will use the rjags package - and relatively large number of samples as we can expect some autocorrelation in the Gibbs sampler.

+
+backend_jags <- SBC_backend_rjags("other_models/changepoint.jags",
+                                  variable.names = c("e","l","s"),
+                                  n.iter = 10000,
+                                  n.burnin = 1000,
+                                  n.chains = 4,
+                                  thin = 10)
+

Running SBC with all the corrected datasets from before (rJAGS accepts input data in exactly the same format as Stan, so we can reuse the datasets without any change):

+
+datasets_2_all <- bind_datasets(datasets_2, datasets_2_more)
+results_jags <- compute_SBC(datasets_2_all, backend_jags,
+                            gen_quants = log_lik_gq,
+                        cache_mode = "results",
+                        cache_location = file.path(cache_dir, "rjags"))
+
## Results loaded from cache file 'rjags'
+
##  - 19 (15%) fits had at least one Rhat > 1.01. Largest Rhat was NA.
+
##  - 95 (73%) fits had tail ESS undefined or less than half of the maximum rank, potentially skewing 
+## the rank statistics. The lowest tail ESS was NA.
+##  If the fits look good otherwise, increasing `thin_ranks` (via recompute_SBC_statistics) 
+## or number of posterior draws (by refitting) might help.
+
## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+

Similarly to the case above, the Rhat and ESS warnings are false positives due to the s parameter, which we need to handle better.

+

The rank plots show no problems.

+
+plot_rank_hist(results_jags)
+

+
+plot_ecdf_diff(results_jags)
+

+

As an exercise, we can also write the marginalized version of the model in JAGS. In some cases, marginalization improves performance even for JAGS models, however, for this model it is actually not an improvement, presumably because the model is very simple.

+
+cat(readLines("other_models/changepoint_marginalized.jags"), sep = "\n")
+
data {
+  for(i in 1:T) {
+    prior_unif[i] = -log(T)
+  }
+
+  # Using the zeroes crossing trick to compute the likelihood
+  # See e.g. https://667-per-cm.net/2014/02/17/the-zero-crossings-trick-for-jags-finding-roots-stochastically/
+  z = 0
+}
+
+model {
+  e ~ dexp(r_e);
+  l ~ dexp(r_l);
+
+  # Prepare the zero trick
+  z ~ dpois(z_mean)
+
+  # Compute the likelihood
+  # The lp is a matrix to avoid having to redefine nodes
+  lp[1, 1:T] = prior_unif
+  for (s in 1:T) {
+    for (t in 1:T) {
+      lp[1 + t, s] = lp[t, s] + log(ifelse(t < s, e, l)) * y[t] - ifelse(t < s, e, l)
+    }
+    p[s] = exp(lp[T + 1, s])
+  }
+
+  # log-sum-exp to compute the log likelihood in a numerically stable way
+  m = max(lp[T + 1, ])
+  sum_exp_rest[1] = 0
+  for(t in 1:T) {
+    sum_exp_rest[1 + t] = sum_exp_rest[t] + exp(lp[T + 1, s] - m)
+  }
+  lp_total = m + log(sum_exp_rest[T + 1])
+
+  # We have the likelihood now add it to z_mean for the zeros trick
+  z_mean = -lp_total + 10000
+
+  s ~ dcat(p)
+}
+

The code got quite a bit more complex, se let’s check if we didn’t mess up the rewrite - first we build a backend with this new representation:

+
+backend_jags_marginalized <- SBC_backend_rjags("other_models/changepoint_marginalized.jags",
+                                  variable.names = c("e","l","s"),
+                                  n.iter = 10000,
+                                  n.burnin = 1000,
+                                  n.chains = 4,
+                                  thin = 10)
+

Then we run the actual SBC:

+
+results_jags_marginalized <- compute_SBC(datasets_2_all, backend_jags_marginalized,
+                                         gen_quants = log_lik_gq,
+                        cache_mode = "results",
+                        cache_location = file.path(cache_dir, "rjags_marginalized"))
+
## Results loaded from cache file 'rjags_marginalized'
+
##  - 24 (18%) fits had at least one Rhat > 1.01. Largest Rhat was NA.
+
##  - 95 (73%) fits had tail ESS undefined or less than half of the maximum rank, potentially skewing 
+## the rank statistics. The lowest tail ESS was NA.
+##  If the fits look good otherwise, increasing `thin_ranks` (via recompute_SBC_statistics) 
+## or number of posterior draws (by refitting) might help.
+
## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+

And the ranks plots look good, so we indeed probably did succeed in correctly marginalizing the s parameter!

+
+plot_rank_hist(results_jags_marginalized)
+

+
+plot_ecdf_diff(results_jags_marginalized)
+

+
+
+ + + +
+ + + +
+ +
+

+

Site built with pkgdown 2.0.1.

+
+ +
+
+ + + + + + + + diff --git a/docs/articles/discrete_vars_files/figure-html/ranks_jags-1.png b/docs/articles/discrete_vars_files/figure-html/ranks_jags-1.png new file mode 100644 index 0000000..e5e580b Binary files /dev/null and b/docs/articles/discrete_vars_files/figure-html/ranks_jags-1.png differ diff --git a/docs/articles/discrete_vars_files/figure-html/ranks_jags-2.png b/docs/articles/discrete_vars_files/figure-html/ranks_jags-2.png new file mode 100644 index 0000000..2b8ebc6 Binary files /dev/null and b/docs/articles/discrete_vars_files/figure-html/ranks_jags-2.png differ diff --git a/docs/articles/discrete_vars_files/figure-html/ranks_jags_marginalized-1.png b/docs/articles/discrete_vars_files/figure-html/ranks_jags_marginalized-1.png new file mode 100644 index 0000000..0f2a15d Binary files /dev/null and b/docs/articles/discrete_vars_files/figure-html/ranks_jags_marginalized-1.png differ diff --git a/docs/articles/discrete_vars_files/figure-html/ranks_jags_marginalized-2.png b/docs/articles/discrete_vars_files/figure-html/ranks_jags_marginalized-2.png new file mode 100644 index 0000000..697d56f Binary files /dev/null and b/docs/articles/discrete_vars_files/figure-html/ranks_jags_marginalized-2.png differ diff --git a/docs/articles/discrete_vars_files/figure-html/results1_plots-1.png b/docs/articles/discrete_vars_files/figure-html/results1_plots-1.png new file mode 100644 index 0000000..bc8653f Binary files /dev/null and b/docs/articles/discrete_vars_files/figure-html/results1_plots-1.png differ diff --git a/docs/articles/discrete_vars_files/figure-html/results1_plots-2.png b/docs/articles/discrete_vars_files/figure-html/results1_plots-2.png new file mode 100644 index 0000000..47a5397 Binary files /dev/null and b/docs/articles/discrete_vars_files/figure-html/results1_plots-2.png differ diff --git a/docs/articles/discrete_vars_files/figure-html/results_2_all_plots-1.png b/docs/articles/discrete_vars_files/figure-html/results_2_all_plots-1.png new file mode 100644 index 0000000..2e04395 Binary files /dev/null and b/docs/articles/discrete_vars_files/figure-html/results_2_all_plots-1.png differ diff --git a/docs/articles/discrete_vars_files/figure-html/results_2_all_plots-2.png b/docs/articles/discrete_vars_files/figure-html/results_2_all_plots-2.png new file mode 100644 index 0000000..d4be555 Binary files /dev/null and b/docs/articles/discrete_vars_files/figure-html/results_2_all_plots-2.png differ diff --git a/docs/articles/discrete_vars_files/figure-html/results_2_plots-1.png b/docs/articles/discrete_vars_files/figure-html/results_2_plots-1.png new file mode 100644 index 0000000..7015412 Binary files /dev/null and b/docs/articles/discrete_vars_files/figure-html/results_2_plots-1.png differ diff --git a/docs/articles/discrete_vars_files/figure-html/results_2_plots-2.png b/docs/articles/discrete_vars_files/figure-html/results_2_plots-2.png new file mode 100644 index 0000000..064d2e7 Binary files /dev/null and b/docs/articles/discrete_vars_files/figure-html/results_2_plots-2.png differ diff --git a/docs/articles/implementing_backends.html b/docs/articles/implementing_backends.html index 28e4b51..ffd2483 100644 --- a/docs/articles/implementing_backends.html +++ b/docs/articles/implementing_backends.html @@ -19,6 +19,8 @@ + +
+
-

This vignette will discuss how to implement a new type of backend for the SBC package and thus allow you to integrate the SBC package with any method/algorithm that can produce samples. As an example, we’ll wrap the base R glm function as a backend. This will also let us discuss how we can treat frequentist models as approximations to Bayesian models and that SBC can tell us how faithful such an approximation is.

-

We assume familiarity with the SBC package architecture as discussed in the basic usage vignette and S3 classes in R.

+

This vignette will discuss how to implement a new type of backend for the SBC package and thus allow you to integrate the SBC package with any method/algorithm that can produce draws from a posterior distribution or its approximation. As an example, we’ll wrap the base R glm function as a backend. This will also let us discuss how we can treat frequentist models as approximations to Bayesian models and that SBC can tell us how faithful such an approximation is.

+

We assume familiarity with the SBC package architecture as discussed in the basic usage vignette and S3 classes in R.

Let’s setup the environment.

-library(SBC)
-library(ggplot2)
-library(medicaldata)
-library(formula.tools)
-library(MASS)
+library(SBC)
+library(ggplot2)
+library(medicaldata)
+library(formula.tools)
+library(MASS)
 
 # Setup caching of results
-cache_dir <- "./implementing_backends_SBC_cache"
-if(!dir.exists(cache_dir)) {
-  dir.create(cache_dir)
+cache_dir <- "./_implementing_backends_SBC_cache"
+if(!dir.exists(cache_dir)) {
+  dir.create(cache_dir)
 }
-
-

-Minimal backend support

-

If you remember from the, interface introduction vignette a backend for the SBC package describes a function that takes in data and produces samples, i.e. the backend holds all the information other than data that are needed to run the given statistical method.

-

For practical reasons, the SBC package actually splits that function into two steps: first, there is an S3 generic SBC_fit(), that takes a backend object, dataset and the number of cores it is allowed to use and produces an arbitrary object representing the fit. Additionally, there is an SBC_fit_to_draws_matrix() S3 generic that takes in the resulting fit and returns posterior samples in the posterior::draws_matrix format. The split here is useful because it lets the SBC_results object to store the raw fit objects, which can then be inspected by user for debugging purposes. The SBC package makes no assumptions on the fit objects beyond the fact that they define an SBC_fit_to_draws_matrix method. To be precise, even this is not necessary, because if the object implements a method for as_draws_matrix but not SBC_fit_to_draws_matrix, the as_draws_matrix implementation will be called.

+
+

Minimal backend support +

+

If you remember from the, interface introduction vignette a backend for the SBC package describes a function that takes in data and produces posterior draws, i.e. the backend holds all the information other than data that are needed to run the given statistical method.

+

For practical reasons, the SBC package actually splits that function into two steps: first, there is an S3 generic SBC_fit(), that takes a backend object, observed data and the number of cores it is allowed to use and produces an arbitrary object representing the fit. Additionally, there is an SBC_fit_to_draws_matrix() S3 generic that takes in the resulting fit and returns posterior draws in the posterior::draws_matrix format. The split here is useful because it lets the SBC_results object to store the raw fit objects, which can then be inspected by user for debugging purposes. The SBC package makes no assumptions on the fit objects beyond the fact that they define an SBC_fit_to_draws_matrix method. To be precise, even this is not necessary, because if the object implements a method for as_draws_matrix but not SBC_fit_to_draws_matrix, the as_draws_matrix implementation will be called.

So a simple implementation wrapping the glm function will consist of three short functions. First is a “constructor” function that creates new instance of the backend. Here, we’ll just capture all the arguments (which we will later pass to glm), the created object will be of a new S3 class SBC_backend_glm:

 SBC_backend_glm <- function(...) {
-    args = list(...)
-    if(any(names(args) == "data")) {
-      stop(paste0("Parameter 'data' cannot be provided when defining a backend",
+    args = list(...)
+    if(any(names(args) == "data")) {
+      stop(paste0("Argument 'data' cannot be provided when defining a backend",
                   " as it needs to be set by the SBC package"))
     }
   
-    structure(list(args = args), class = "SBC_backend_glm")
+    structure(list(args = args), class = "SBC_backend_glm")
 }

So e.g. SBC_backend_glm(y ~ x, family = "poisson") would create a valid backend representing a simple Poisson regression.

-

Now we create an implementation of SBC_fit for the newly created class. We take the generated dataset (generated parameter) and pass it - along with all the arguments we stored in the constructor - to glm via do.call. We ignore the cores argument as we don’t have multicore support.

+

Now we create an implementation of SBC_fit for the newly created class. We take the generated data (generated argument) and pass it - along with all the arguments we stored in the constructor - to glm via do.call. We ignore the cores argument as we don’t have multicore support.

 SBC_fit.SBC_backend_glm <- function(backend, generated, cores) {
   args_with_data <- backend$args
   args_with_data$data <- generated
   
-  do.call(glm, args_with_data)
+  do.call(glm, args_with_data)
 }
-

In some cases, it might be undesirable to work with the raw fit as returned by the underlying package and wrap the result in some custom class, but here we’ll have no trouble working with the glm S3 class (as returned by the glm() function) directly.

+

In some cases, it might be undesirable to work with the raw fit as returned by the underlying package and wrap the result in some custom class, but here we’ll have no trouble working with the glm S3 class (as returned by the glm() function) directly.

The most conceptually difficult part is then the implementation of SBC_fit_to_draws_matrix for the glm S3 class. Here, we’ll remember how actually glm fits the model: it finds the maximum likelihood estimate (MLE) and then uses the Hessian to construct a multivariate normal approximation to the likelihood around the MLE. In this way we can see glm as an approximate Bayesian method where:

  • A flat improper prior is used for all parameters
  • The posterior is approximated by a multivariate normal distribution
-

And that’s exactly what we’ll do: the coef method for glm fit returns the MLE and the vcov method returns the variance-covariance matrix implied by the Hessian, so all we need is to take a bunch of samples (here 1000) from this multivariate normal. Therefore, the implementation is also very simple:

+

And that’s exactly what we’ll do: the coef method for glm fit returns the MLE and the vcov method returns the variance-covariance matrix implied by the Hessian, so all we need is to take a bunch of draws (here 1000) from this multivariate normal. Therefore, the implementation is also very simple:

 SBC_fit_to_draws_matrix.glm <- function(fit) {
-  samp_matrix <- MASS::mvrnorm(n = 1000, mu = coef(fit), Sigma = vcov(fit))
-  posterior::as_draws_matrix(samp_matrix)
+  samp_matrix <- MASS::mvrnorm(n = 1000, mu = coef(fit), Sigma = vcov(fit))
+  posterior::as_draws_matrix(samp_matrix)
 }

Note that for non-base packages, we specify the namespace explicitly and do not rely on those functions being loaded via library - this is good practice for package development and will make paralellization work (see notes below).

A quick example to show this minimal setup already works. We’ll build a simple Poisson regression simulator:

 generator_single_poisson <- function(N) {
-  log_intercept <- rnorm(1, mean = 4,  sd = 1.5)
-  beta <- rnorm(1, mean = 0, sd = 1)
-  X <- rnorm(N, mean = 0, sd = 1)
+  log_intercept <- rnorm(1, mean = 4,  sd = 1.5)
+  beta <- rnorm(1, mean = 0, sd = 1)
+  X <- rnorm(N, mean = 0, sd = 1)
   mus <- log_intercept + beta * X
-  y <- rpois(N, exp(mus))
+  y <- rpois(N, exp(mus))
   
-  list(
-    parameters = list(
-      # Naming the parameters in the same way glm will name coefs
+  list(
+    variables = list(
+      # Naming the variables in the same way glm will name coefs
       `(Intercept)` = log_intercept,
       x = beta
     ),
-    generated = data.frame(y = y, x = X)
+    generated = data.frame(y = y, x = X)
   )
 }
 
-set.seed(354662)
+set.seed(354662)
 datasets_poisson <- generate_datasets(SBC_generator_function(generator_single_poisson, N = 100), 
-                              n_datasets = 100)
+ n_sims = 100)

Then we’ll construct a matching backend and compute the results.

 backend_poisson <- SBC_backend_glm(y ~ x, family = "poisson")
-res_poisson <- compute_results(datasets_poisson, 
+res_poisson <- compute_SBC(datasets_poisson, 
                                backend_poisson, 
                                thin_ranks = 1,
                                cache_mode = "results", 
-                               cache_location = file.path(cache_dir,"poisson"))
-
## Results loaded from cache file 'poisson'
-

We have set thin_ranks = 1 as no thinning is needed (the samples are i.i.d. by construction).

+ cache_location = file.path(cache_dir,"poisson"))
+
## Results loaded from cache file 'poisson'
+

We have set thin_ranks = 1 as no thinning is needed (the draws are i.i.d. by construction).

The rank and ecdf plots show no big problems

 plot_rank_hist(res_poisson)

-plot_ecdf_diff(res_poisson)
+plot_ecdf_diff(res_poisson)

-

This is not unexpected - we’ve used a large dataset and a simple model, so choice of prior should have negligible impact on the posterior and the normal approximation is very close to the exact Bayesian solution.

-

We can see that both model parameters are recovered almost exactly in almost all fits:

+

This is not unexpected - we’ve used a large number of observations and a simple model, so choice of prior should have negligible impact on the posterior and the normal approximation is very close to the exact Bayesian solution.

+

We can see that both variables are recovered almost exactly in almost all fits:

 plot_sim_estimated(res_poisson)

-
-

-Additional backend improvements

+
+

Additional backend improvements +

Beyond the minimal setup, there are additional functions that our backend can implement to make it more comfortable to use. Let’s walk through the options and se and how they can be implemented for the glm wrapper.

-

Since (unlike MCMC methods) the glm approximation does not produce autocorrelated samples, we can implement SBC_backend_iid_samples to return TRUE. The SBC package will then by default use thin_ranks = 1 argument to compute_results and will not assess convergence/autocorrelation via the R-hat and ESS diagnostics.

+

Since (unlike MCMC methods) the glm approximation does not produce autocorrelated draws, we can implement SBC_backend_iid_draws to return TRUE. The SBC package will then by default use thin_ranks = 1 argument to compute_SBC and will not assess convergence/autocorrelation via the R-hat and ESS diagnostics.

-SBC_backend_iid_samples.SBC_backend_glm <- function(backend) {
+SBC_backend_iid_draws.SBC_backend_glm <- function(backend) {
   TRUE
 }

Backends based on MCMC may want to implement SBC_backend_default_thin_ranks() which specifies the default thinning needed to remove autocorrelation from the fit.

Most statistical algorithms also give us some diagnostics that let us understand whether there was some problem in fitting.

To see some of the problems that glm can encounter, we’ll run a quite pathological logistic regression:

-problematic_data <- data.frame(y = rep(0:1, each = 100), x = 1:200)
-glm(y ~ x, data = problematic_data, family = "binomial")
-
## Warning: glm.fit: algorithm did not converge
-
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
-
## 
-## Call:  glm(formula = y ~ x, family = "binomial", data = problematic_data)
-## 
-## Coefficients:
-## (Intercept)            x  
-##    -3006.52        29.92  
-## 
-## Degrees of Freedom: 199 Total (i.e. Null);  198 Residual
-## Null Deviance:       277.3 
-## Residual Deviance: 1.276e-06     AIC: 4
+problematic_data <- data.frame(y = rep(0:1, each = 100), x = 1:200) +glm(y ~ x, data = problematic_data, family = "binomial")
+
## Warning: glm.fit: algorithm did not converge
+
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
+
## 
+## Call:  glm(formula = y ~ x, family = "binomial", data = problematic_data)
+## 
+## Coefficients:
+## (Intercept)            x  
+##    -3006.52        29.92  
+## 
+## Degrees of Freedom: 199 Total (i.e. Null);  198 Residual
+## Null Deviance:       277.3 
+## Residual Deviance: 1.276e-06     AIC: 4

So we get two problems - one is that the optimization method did not converge (this can be checked via the $converged member of the resulting fit) and that extreme probabilities occurred (this is related to the problem of “perfect separation”). This problem is however only signalled as a warning and never reported in the fit object itself.

For this reason, SBC package will capture all output, warnings and messages that the backend produced and our implementation can then inspect and process all of the output. This can be achieved by implementing the SBC_fit_to_diagnostics() generic for our fit object. This method should return a single row data.frame that contains any diagnostics. Additionally, it is often useful to add a custom class to the results to allow for automatic summaries (we’ll get there). This is our implementation for the glm class:

 SBC_fit_to_diagnostics.glm <- function(fit, fit_output, fit_messages, fit_warnings) {
-  res <- data.frame(
-    probs_0_1 = any(grepl("fitted probabilities numerically 0 or 1 occurred", fit_warnings)),
+  res <- data.frame(
+    probs_0_1 = any(grepl("fitted probabilities numerically 0 or 1 occurred", fit_warnings)),
     converged = fit$converged
   )
 
-  class(res) <- c("SBC_glm_diagnostics", class(res))
+  class(res) <- c("SBC_glm_diagnostics", class(res))
   res
 }

Having a custom class let’s us implement a summary implementation for our diagnostics:

 summary.SBC_glm_diagnostics <- function(x) {
-  summ <- list(
-    n_fits = nrow(x),
-    n_probs_0_1 = sum(x$probs_0_1),
-    n_not_converged = sum(!x$converged)
+  summ <- list(
+    n_fits = nrow(x),
+    n_probs_0_1 = sum(x$probs_0_1),
+    n_not_converged = sum(!x$converged)
   )
 
-  structure(summ, class = "SBC_glm_diagnostics_summary")  
+  structure(summ, class = "SBC_glm_diagnostics_summary")  
 }

and we can then implement the get_diagnostic_messages() generic, which takes an object and returns a list of messages potentially signalling problems - any messages that signal problems are then reported automatically to the user. The OK messages are also reported when calling summary on an SBC_results object.

We’ll use our summary implementation for SBC_glm_diagnostics to create the messages:

 get_diagnostic_messages.SBC_glm_diagnostics <- function(x) {
-  get_diagnostic_messages(summary(x))
+  get_diagnostic_messages(summary(x))
 }
 
 
 get_diagnostic_messages.SBC_glm_diagnostics_summary <- function(x) {
-  message_list <- list()
+  message_list <- list()
   
   if(x$n_probs_0_1 == 0) {
     message_list[[1]] <- 
-      data.frame(ok = TRUE, message = "No fit had 0/1 probabilities.")
+      data.frame(ok = TRUE, message = "No fit had 0/1 probabilities.")
   } else {
     message_list[[1]] <- 
-      data.frame(ok = FALSE, 
-                 message = paste0(
-                   x$n_probs_0_1, " (", round(100 * x$n_probs_0_1 / x$n_fits), 
+      data.frame(ok = FALSE, 
+                 message = paste0(
+                   x$n_probs_0_1, " (", round(100 * x$n_probs_0_1 / x$n_fits), 
                    "%) of fits had 0/1 probabilities."))    
   }
   
   if(x$n_not_converged == 0) {
     message_list[[2]] <- 
-      data.frame(ok = TRUE, message = "All fits converged.")
+      data.frame(ok = TRUE, message = "All fits converged.")
   } else {
     message_list[[2]] <- 
-      data.frame(ok = FALSE, 
-                 message = paste0(
-                   x$n_not_converged, " (", round(100 * x$n_not_converged / x$n_fits), 
+      data.frame(ok = FALSE, 
+                 message = paste0(
+                   x$n_not_converged, " (", round(100 * x$n_not_converged / x$n_fits), 
                    "%) of fits did not converge."))    
   }  
   
-  SBC_diagnostic_messages(do.call(rbind, message_list))
+  SBC_diagnostic_messages(do.call(rbind, message_list))
 }

Finally, some backend objects are complex and may differ between R sessions, even if they represent the same backend (Stan backends are a prime example as the pointer/path to a compiled model can change). If that’s the case, it may break the built-in caching functionality. Such backends may want to implement the SBC_backend_hash_for_cache() generic to provide a hash with better properties.

And that’s about all that we can do for our backend. Before we’ll use the extended backend for some investigations, we’ll make an aside about parallel support.

-
-

-Considerations for parallelization

-

SBC uses the future package to allow paralellization. This means that when user sets up a parallel environment (e.g. via plan(multisession)), the SBC_fit(), SBC_fit_to_draws_matrix() and SBC_backend_iid_samples() implementations will run in a fresh session. To make this work smoothly, the functions should call non-base R functions explicitly via namespace declaration (e.g. note that we call MASS::mvrnorm, not just mvrnorm).

-

If you are implementing the backend to become part of the SBC package, nothing more is needed for paralellization to work. If however you are just building an ad-hoc backend that lives in your global environment, you will also need to pass the three functions to the globals argument of compute_results which will make them available on all workers i.e. use:

-
compute_results(..., globals = c("SBC_fit.SBC_backend_glm", 
-                                 "SBC_fit_to_draws_matrix.glm",
-                                 "SBC_backend_iid_samples.SBC_backend_glm"))
-
+
+

Considerations for parallelization +

+

SBC uses the future package to allow paralellization. This means that when user sets up a parallel environment (e.g. via plan(multisession)), the SBC_fit(), SBC_fit_to_draws_matrix() and SBC_backend_iid_draws() implementations will run in a fresh session. To make this work smoothly, the functions should call non-base R functions explicitly via namespace declaration (e.g. note that we call MASS::mvrnorm, not just mvrnorm).

+

If you are implementing the backend to become part of the SBC package, nothing more is needed for paralellization to work. If however you are just building an ad-hoc backend that lives in your global environment, you will also need to pass the three functions to the globals argument of compute_SBC which will make them available on all workers i.e. use:

+
compute_SBC(..., globals = c("SBC_fit.SBC_backend_glm", 
+                                 "SBC_fit_to_draws_matrix.glm",
+                                 "SBC_backend_iid_draws.SBC_backend_glm"))

If those functions in turn depend on other functions not defined in a package, those functions would need to be added to globals as well. In some future version of the package we hope to be able to autodetect those dependencies.

Also note that if you are OK with single-core processing (which with glm is very fast), you don’t need to care about any of this.

-
-

-glm as approximate Bayesian method

+
+

+glm as approximate Bayesian method +

As we discussed earlier, we can view glm as an approximation to a fully Bayesian method where the posterior is approximate and priors are flat. We would therefore expect the approximation to be well-behaved when we use wide priors for simulation and/or have a lot of data to inform all the model coefficients. The obvious way to verify whether the approximation is good is to run both full Bayesian inference (e.g. via rstanarm) and glm on the same data. The problem is that this reduces the appeal of the approximation, as we need to wait for the fully Bayesian fit anyway and so we might as well just use the Bayesian version.

-

However, SBC allows another way - we can check that the approximation is good by running only the approximate method (a lot of times) and look at SBC results. This may still be faster than running a single fully Bayesian fit. Additionally, fitting with an approximate algorithm can be useful to run approximate power calculations where it lets us cheaply fit a lot of simulated datasets to e.g. understand how the width of our posterior intervals changes with sample size and at the same time we learn, whether the approximation is problematic in some way.

-

For the sake of example, let’s assume we’ve already gathered a dataset that we want to analyze with Bayesian logistic regression. So our data generating process will use the observed covariate values but simulate new coefficients and outcome data. Below is a simple implementation with normal priors on the intercept and predictors. Note that we do some rejection sampling here to avoid using datasets where the generated response is the same or almost the same for all rows.

+

However, SBC allows another way - we can check that the approximation is good by running only the approximate method (a lot of times) and look at SBC results. This may still be faster than running a single fully Bayesian fit. Additionally, fitting with an approximate algorithm can be useful to run approximate power calculations where it lets us cheaply fit a lot of simulations to e.g. understand how the width of our posterior intervals changes with sample size and at the same time we learn, whether the approximation is problematic in some way.

+

For the sake of example, let’s assume we’ve already gathered data that we want to analyze with Bayesian logistic regression. So our data generating process will use the observed covariate values but simulate new coefficients and outcome data. Below is a simple implementation with normal priors on the intercept and predictors. Note that we do some rejection sampling here to avoid using simulations where the generated response is the same or almost the same for all rows.

 generator_single_logistic <- function(formula, 
-                                      dataset, 
+                                      template_data, 
                                       intercept_prior_loc = 0,
                                       intercept_prior_width = 2,
                                       predictor_prior_loc = 0,
                                       predictor_prior_width = 1) {
-  response_name <- formula.tools::lhs.vars(formula)
-  if(length(response_name) != 1) {
-    stop("The formula has to have just a single response")
+  response_name <- formula.tools::lhs.vars(formula)
+  if(length(response_name) != 1) {
+    stop("The formula has to have just a single response")
   }
   
-  X <- model.matrix(formula, dataset)
+  X <- model.matrix(formula, template_data)
   
   repeat {
-    coefs <- rnorm(ncol(X), predictor_prior_loc, sd = predictor_prior_width)
-    names(coefs) <- colnames(X)
-    if("(Intercept)" %in% names(coefs)) {
-      coefs["(Intercept)"] <- rnorm(1, intercept_prior_loc, sd = intercept_prior_width)
+    coefs <- rnorm(ncol(X), predictor_prior_loc, sd = predictor_prior_width)
+    names(coefs) <- colnames(X)
+    if("(Intercept)" %in% names(coefs)) {
+      coefs["(Intercept)"] <- rnorm(1, intercept_prior_loc, sd = intercept_prior_width)
     }
     
-    linpred <- X %*% coefs
-    probs <- plogis(linpred)
-    y <- rbinom(nrow(X), size = 1, prob = probs)
-    if(sum(y == 0) >= 5 && sum(y == 1) >= 5) {
+    linpred <- X %*% coefs
+    probs <- plogis(linpred)
+    y <- rbinom(nrow(X), size = 1, prob = probs)
+    if(sum(y == 0) >= 5 && sum(y == 1) >= 5) {
       break;
     }
   }
 
-  dataset_mod <- dataset
-  dataset_mod[[response_name]] <- y
+  data_mod <- template_data
+  data_mod[[response_name]] <- y
 
-  list(
-    parameters = as.list(coefs),
-    generated = dataset_mod
+  list(
+    variables = as.list(coefs),
+    generated = data_mod
   ) 
 }
-

We are going to use the indo_rct dataset from the medicaldata package. The dataset contains the results of a randomized, placebo-controlled, prospective 2-arm trial of indomethacin 100 mg PR once vs. placebo to prevent post-ERCP Pancreatitis in 602 patients. You can inspect the codebook as well as the published paper online. The citation for the paper is:

-

Elmunzer, Higgins, et al., A Randomized Trial of Rectal Indomethacin to Prevent Post-ERCP Pancreatitis, New England Journal of Medicine, 2012, volume 366, pages 1414-1422, as found here.

-
-

-Well-informed model

+

We are going to use the indo_rct dataset from the medicaldata package as a template. The dataset contains the results of a randomized, placebo-controlled, prospective 2-arm trial of indomethacin 100 mg PR once vs. placebo to prevent post-ERCP Pancreatitis in 602 patients. You can inspect the codebook as well as the published paper online. The citation for the paper is:

+

Elmunzer, Higgins, et al., A Randomized Trial of Rectal Indomethacin to Prevent Post-ERCP Pancreatitis, New England Journal of Medicine, 2012, volume 366, pages 1414-1422, as found here.

+
+

Well-informed model +

We’ll start by testing our approximate computation with the simplest analysis - the primary binary outcome predicted only by the treatment (the rx column in the data):

 formula_indo_simple <- outcome ~ rx
-set.seed(6524243)
+set.seed(6524243)
 datasets_indo_simple <- generate_datasets(SBC_generator_function(
   generator_single_logistic, 
   formula = formula_indo_simple,
-  dataset = medicaldata::indo_rct),
-  n_datasets = 500) 
+  template_data = medicaldata::indo_rct),
+  n_sims = 500) 
 
 backend_indo_simple <- SBC_backend_glm(formula = formula_indo_simple, family = "binomial") 
-res_indo_simple <- compute_results(datasets_indo_simple, backend_indo_simple,
+res_indo_simple <- compute_SBC(datasets_indo_simple, backend_indo_simple,
                                    cache_mode = "results", 
-                                   cache_location = file.path(cache_dir,"indo_simple"))
-
## Results loaded from cache file 'indo_simple'
+ cache_location = file.path(cache_dir,"indo_simple"))
+
## Results loaded from cache file 'indo_simple'

The rank plots look good:

 plot_rank_hist(res_indo_simple)

-plot_ecdf_diff(res_indo_simple)
+plot_ecdf_diff(res_indo_simple)

The coverages are very tight

@@ -405,156 +410,156 @@ 

we can make this precise by inspecting the same results numerically:

-stats_effect <- res_indo_simple$stats[res_indo_simple$stats$parameter == "rx1_indomethacin",]
-main_eff_coverage <- empirical_coverage(stats_effect, width = c(0.5,0.9, 0.95))
+stats_effect <- res_indo_simple$stats[res_indo_simple$stats$variable == "rx1_indomethacin",]
+main_eff_coverage <- empirical_coverage(stats_effect, width = c(0.5,0.9, 0.95))
 main_eff_coverage
-
## # A tibble: 3 x 6
-##   parameter        width width_represented ci_low estimate ci_high
-##   <chr>            <dbl>             <dbl>  <dbl>    <dbl>   <dbl>
-## 1 rx1_indomethacin  0.5              0.500  0.482    0.526   0.569
-## 2 rx1_indomethacin  0.9              0.900  0.871    0.9     0.923
-## 3 rx1_indomethacin  0.95             0.950  0.930    0.952   0.967
+
## # A tibble: 3 x 6
+##   variable         width width_represented ci_low estimate ci_high
+##   <chr>            <dbl>             <dbl>  <dbl>    <dbl>   <dbl>
+## 1 rx1_indomethacin  0.5               0.5   0.486    0.53    0.573
+## 2 rx1_indomethacin  0.9               0.9   0.868    0.898   0.922
+## 3 rx1_indomethacin  0.95              0.95  0.930    0.952   0.967

so we would expect e.g. the 95% CI for the main effect to correspond to roughly 93% - 97% credible interval of a fully Bayesian treatment - using more simulations would let us make this more precise, but for now we are happy that there clearly is no substantial discrepancy.

We may want to also look at how tight estimates we get - looking directly gives us a somewhat unhelpful plot:

 plot_sim_estimated(res_indo_simple)

-

There is a simulation where the posterior uncertainty is very large. This corresponds to dataset where the outcome is the same for all rows where the treatment was used:

+

There is a simulation where the posterior uncertainty is very large. This corresponds to observed data where the outcome is the same for all rows where the treatment was used:

-biggest_sd_dataset <- res_indo_simple$stats$dataset_id[
-  which.max(res_indo_simple$stats$sd)]
-table(datasets_indo_simple$generated[[biggest_sd_dataset]][c("outcome", "rx")])
-
##        rx
-## outcome 0_placebo 1_indomethacin
-##       0        26              0
-##       1       281            295
-

Filtering the extreme datasets out, we see that most commonly, we get a decently precise estimate.

+biggest_sd_sim <- res_indo_simple$stats$sim_id[ + which.max(res_indo_simple$stats$sd)] +table(datasets_indo_simple$generated[[biggest_sd_sim]][c("outcome", "rx")])

+
##        rx
+## outcome 0_placebo 1_indomethacin
+##       0        26              0
+##       1       281            295
+

Filtering the extreme simulations out, we see that most commonly, we get a decently precise estimate.

 stats_filtered <- res_indo_simple$stats[res_indo_simple$stats$sd < 200, ]
 plot_sim_estimated(stats_filtered, alpha = 0.3)

-
-

-Badly-informed model

+
+

Badly-informed model +

But the simple approximation does not work everytime. Let’s say we want to increase the precision of our main effect estimate by adjusting for some factors that we believe are associated with the outcome: the study site, gender and age of the patients. Since one site has only 3 patients, we’ll remove it from the simulations. Additionally, we’ll standardize the age to be centered at 50 and divide by 10 to make the \(N(0,1)\) prior on the age coefficient have a sensible scale. To make matters worse, we further subsample the data to contain only 100 rows.

-set.seed(21645222)
+set.seed(21645222)
 
-indo_rct_complex <- droplevels(
-  medicaldata::indo_rct[medicaldata::indo_rct$site != "4_Case",])
-rows_to_keep <- sample(1:nrow(indo_rct_complex), size = 100)
+indo_rct_complex <- droplevels(
+  medicaldata::indo_rct[medicaldata::indo_rct$site != "4_Case",])
+rows_to_keep <- sample(1:nrow(indo_rct_complex), size = 100)
 indo_rct_complex <- indo_rct_complex[rows_to_keep,]
 indo_rct_complex$age <- (indo_rct_complex$age - 50) / 10
 indo_rct_complex$risk <- indo_rct_complex$risk - 3
-indo_rct_complex$type <- factor(indo_rct_complex$type, ordered = TRUE)
+indo_rct_complex$type <- factor(indo_rct_complex$type, ordered = TRUE)
 
 formula_indo_complex <- outcome ~ rx + site + gender + age + risk
 datasets_indo_complex <- generate_datasets(SBC_generator_function(
   generator_single_logistic, 
   formula = formula_indo_complex,
-  dataset = indo_rct_complex),
-  n_datasets = 500) 
+  template_data = indo_rct_complex),
+  n_sims = 500) 
 
 backend_indo_complex <- SBC_backend_glm(formula = formula_indo_complex, family = "binomial") 

Now we are ready to run SBC:

-res_indo_complex <- compute_results(datasets_indo_complex, backend_indo_complex,
+res_indo_complex <- compute_SBC(datasets_indo_complex, backend_indo_complex,
                                    cache_mode = "results", 
-                                   cache_location = file.path(cache_dir,"indo_complex"))
-
## Results loaded from cache file 'indo_complex'
-
##  - 19 (4%) of fits had 0/1 probabilities.
-
##  - 2 (0%) of fits did not converge.
-
## Not all diagnostics are OK.
-## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
-## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+ cache_location = file.path(cache_dir,"indo_complex"))
+
## Results loaded from cache file 'indo_complex'
+
##  - 19 (4%) of fits had 0/1 probabilities.
+
##  - 2 (0%) of fits did not converge.
+
## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.

We see the work we’ve done for diagnostics previously paid off as we are notified of some problems.

Additionally the rank plots shows some miscalibration, most pronounced for the site3_UK coefficients:

 plot_rank_hist(res_indo_complex)

-plot_ecdf_diff(res_indo_complex)
+plot_ecdf_diff(res_indo_complex)

What happens is that many of the simulations result in extremely wide uncertainties around some of the coefficients, making the modest simulated values fall almost exactly in the middle - this then results in the overabundance of ranks around the midpoint. A fully Bayesian fit would regularize the uncertainties and avoid this problem.

The main effect of interest (rx1_indomethacin) is however still reasonably well calibrated

-stats_effect <- res_indo_complex$stats[res_indo_complex$stats$parameter == "rx1_indomethacin",]
-main_eff_coverage <- empirical_coverage(stats_effect, width = c(0.5,0.9, 0.95))
+stats_effect <- res_indo_complex$stats[res_indo_complex$stats$variable == "rx1_indomethacin",]
+main_eff_coverage <- empirical_coverage(stats_effect, width = c(0.5,0.9, 0.95))
 main_eff_coverage
-
## # A tibble: 3 x 6
-##   parameter        width width_represented ci_low estimate ci_high
-##   <chr>            <dbl>             <dbl>  <dbl>    <dbl>   <dbl>
-## 1 rx1_indomethacin  0.5              0.500  0.438    0.482   0.526
-## 2 rx1_indomethacin  0.9              0.900  0.868    0.898   0.922
-## 3 rx1_indomethacin  0.95             0.950  0.930    0.952   0.967
+
## # A tibble: 3 x 6
+##   variable         width width_represented ci_low estimate ci_high
+##   <chr>            <dbl>             <dbl>  <dbl>    <dbl>   <dbl>
+## 1 rx1_indomethacin  0.5               0.5   0.438    0.482   0.526
+## 2 rx1_indomethacin  0.9               0.9   0.868    0.898   0.922
+## 3 rx1_indomethacin  0.95              0.95  0.930    0.952   0.967
-
-

-Badly-informed model, narrow priors

+
+

Badly-informed model, narrow priors +

The above case aimed at making the normal approximation to the posterior problematic. We can obviously make things worse by also introducing a strong prior, concentrating away from zero which we’ll do here:

-set.seed(1685554)
+set.seed(1685554)
 datasets_indo_complex_narrow <- generate_datasets(SBC_generator_function(
   generator_single_logistic, 
   formula = formula_indo_complex,
-  dataset = indo_rct_complex,
+  template_data = indo_rct_complex,
   intercept_prior_loc = 3,
   intercept_prior_width = 0.5,
-  predictor_prior_loc = c(-2, 2),
+  predictor_prior_loc = c(-2, 2),
   predictor_prior_width = 0.5),
-  n_datasets = 500) 
+ n_sims = 500)
-res_indo_complex_narrow <- compute_results(datasets_indo_complex_narrow, backend_indo_complex,
+res_indo_complex_narrow <- compute_SBC(datasets_indo_complex_narrow, backend_indo_complex,
                                    cache_mode = "results", 
-                                   cache_location = file.path(cache_dir,"indo_complex_narrow"))
-
## Results loaded from cache file 'indo_complex_narrow'
-
##  - 169 (34%) of fits had 0/1 probabilities.
-
##  - 2 (0%) of fits did not converge.
-
## Not all diagnostics are OK.
-## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
-## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
-

This is enough to make basically all the parameters poorly calibrated:

+ cache_location = file.path(cache_dir,"indo_complex_narrow"))
+
## Results loaded from cache file 'indo_complex_narrow'
+
##  - 169 (34%) of fits had 0/1 probabilities.
+
##  - 2 (0%) of fits did not converge.
+
## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+

This is enough to make basically all the variables poorly calibrated:

 plot_rank_hist(res_indo_complex_narrow)

-plot_ecdf_diff(res_indo_complex_narrow)
+plot_ecdf_diff(res_indo_complex_narrow)

-
-

-Well-informed model, narrow priors

+
+

Well-informed model, narrow priors +

To make the analysis complete, we’ll also return to the simple, well-informed model, but use narrow priors.

-set.seed(3289542)
+set.seed(3289542)
 datasets_indo_simple_narrow <- generate_datasets(SBC_generator_function(
   generator_single_logistic, 
   formula = formula_indo_simple,
-  dataset = medicaldata::indo_rct,
+  template_data = medicaldata::indo_rct,
   intercept_prior_loc = 3,
   intercept_prior_width = 0.5,
-  predictor_prior_loc = c(-2, 2),
+  predictor_prior_loc = c(-2, 2),
   predictor_prior_width = 0.5),
-  n_datasets = 500) 
+ n_sims = 500)
-res_indo_simple_narrow <- compute_results(datasets_indo_simple_narrow, backend_indo_simple,
+res_indo_simple_narrow <- compute_SBC(datasets_indo_simple_narrow, backend_indo_simple,
                                    cache_mode = "results", 
-                                   cache_location = file.path(cache_dir,"indo_simple_narrow"))
-
## Results loaded from cache file 'indo_simple_narrow'
+ cache_location = file.path(cache_dir,"indo_simple_narrow"))
+
## Results loaded from cache file 'indo_simple_narrow'

Turns out that in this case, the likelihood is sometimes not enough to completely overwhelm the prior and the main treatment effect is poorly calibrated:

 plot_rank_hist(res_indo_simple_narrow)

-plot_ecdf_diff(res_indo_simple_narrow)
+plot_ecdf_diff(res_indo_simple_narrow)

-
-

-Conclusions

-

Implementing a minimal support for SBC using an additional algorithm requires writing three relatively simple functions. If you happen to wrap an algorithm, we’d be happy to accept a pull request with your backend at https://github.com/hyunjimoon/SBC/.

+
+

Conclusions +

+

Implementing a minimal support for SBC using an additional algorithm requires writing three relatively simple functions. If you happen to wrap an algorithm, we’d be happy to accept a pull request with your backend at https://github.com/hyunjimoon/SBC/.

We’ve also shown that in some cases, glm can actually be a pretty good approximation to a fully Bayesian treatment of an equivalent model. Unfortunately, the approximation does not work always so well - but SBC can tell us when it works and when it fails.

@@ -570,11 +575,13 @@

-

Site built with pkgdown 1.6.1.

+

+

Site built with pkgdown 2.0.1.

@@ -583,5 +590,7 @@

+ + diff --git a/docs/articles/implementing_backends_files/figure-html/indo_complex_narrow_ranks-1.png b/docs/articles/implementing_backends_files/figure-html/indo_complex_narrow_ranks-1.png index 490ec93..f31616b 100644 Binary files a/docs/articles/implementing_backends_files/figure-html/indo_complex_narrow_ranks-1.png and b/docs/articles/implementing_backends_files/figure-html/indo_complex_narrow_ranks-1.png differ diff --git a/docs/articles/implementing_backends_files/figure-html/indo_complex_narrow_ranks-2.png b/docs/articles/implementing_backends_files/figure-html/indo_complex_narrow_ranks-2.png index d8cc9c7..24869e9 100644 Binary files a/docs/articles/implementing_backends_files/figure-html/indo_complex_narrow_ranks-2.png and b/docs/articles/implementing_backends_files/figure-html/indo_complex_narrow_ranks-2.png differ diff --git a/docs/articles/implementing_backends_files/figure-html/indo_complex_ranks-1.png b/docs/articles/implementing_backends_files/figure-html/indo_complex_ranks-1.png index 742a766..f8a973d 100644 Binary files a/docs/articles/implementing_backends_files/figure-html/indo_complex_ranks-1.png and b/docs/articles/implementing_backends_files/figure-html/indo_complex_ranks-1.png differ diff --git a/docs/articles/implementing_backends_files/figure-html/indo_complex_ranks-2.png b/docs/articles/implementing_backends_files/figure-html/indo_complex_ranks-2.png index fabe51b..0b5d633 100644 Binary files a/docs/articles/implementing_backends_files/figure-html/indo_complex_ranks-2.png and b/docs/articles/implementing_backends_files/figure-html/indo_complex_ranks-2.png differ diff --git a/docs/articles/implementing_backends_files/figure-html/indo_sim_coverage-1.png b/docs/articles/implementing_backends_files/figure-html/indo_sim_coverage-1.png index 9d8536c..f3b4e02 100644 Binary files a/docs/articles/implementing_backends_files/figure-html/indo_sim_coverage-1.png and b/docs/articles/implementing_backends_files/figure-html/indo_sim_coverage-1.png differ diff --git a/docs/articles/implementing_backends_files/figure-html/indo_simple_narrow_ranks-1.png b/docs/articles/implementing_backends_files/figure-html/indo_simple_narrow_ranks-1.png index ccc0952..91a31dd 100644 Binary files a/docs/articles/implementing_backends_files/figure-html/indo_simple_narrow_ranks-1.png and b/docs/articles/implementing_backends_files/figure-html/indo_simple_narrow_ranks-1.png differ diff --git a/docs/articles/implementing_backends_files/figure-html/indo_simple_narrow_ranks-2.png b/docs/articles/implementing_backends_files/figure-html/indo_simple_narrow_ranks-2.png index db736e7..dfa28d1 100644 Binary files a/docs/articles/implementing_backends_files/figure-html/indo_simple_narrow_ranks-2.png and b/docs/articles/implementing_backends_files/figure-html/indo_simple_narrow_ranks-2.png differ diff --git a/docs/articles/implementing_backends_files/figure-html/indo_simple_ranks-1.png b/docs/articles/implementing_backends_files/figure-html/indo_simple_ranks-1.png index 2566050..1a26446 100644 Binary files a/docs/articles/implementing_backends_files/figure-html/indo_simple_ranks-1.png and b/docs/articles/implementing_backends_files/figure-html/indo_simple_ranks-1.png differ diff --git a/docs/articles/implementing_backends_files/figure-html/indo_simple_ranks-2.png b/docs/articles/implementing_backends_files/figure-html/indo_simple_ranks-2.png index c8b0bac..69d3bbd 100644 Binary files a/docs/articles/implementing_backends_files/figure-html/indo_simple_ranks-2.png and b/docs/articles/implementing_backends_files/figure-html/indo_simple_ranks-2.png differ diff --git a/docs/articles/implementing_backends_files/figure-html/indo_simple_sim_est-1.png b/docs/articles/implementing_backends_files/figure-html/indo_simple_sim_est-1.png index 4d1ba63..8379c0f 100644 Binary files a/docs/articles/implementing_backends_files/figure-html/indo_simple_sim_est-1.png and b/docs/articles/implementing_backends_files/figure-html/indo_simple_sim_est-1.png differ diff --git a/docs/articles/implementing_backends_files/figure-html/indo_simple_sim_est_ugly-1.png b/docs/articles/implementing_backends_files/figure-html/indo_simple_sim_est_ugly-1.png index c9ce756..789a65d 100644 Binary files a/docs/articles/implementing_backends_files/figure-html/indo_simple_sim_est_ugly-1.png and b/docs/articles/implementing_backends_files/figure-html/indo_simple_sim_est_ugly-1.png differ diff --git a/docs/articles/implementing_backends_files/figure-html/poisson_ranks-1.png b/docs/articles/implementing_backends_files/figure-html/poisson_ranks-1.png index 3459aea..9bca66a 100644 Binary files a/docs/articles/implementing_backends_files/figure-html/poisson_ranks-1.png and b/docs/articles/implementing_backends_files/figure-html/poisson_ranks-1.png differ diff --git a/docs/articles/implementing_backends_files/figure-html/poisson_ranks-2.png b/docs/articles/implementing_backends_files/figure-html/poisson_ranks-2.png index 86f3a44..9b956b4 100644 Binary files a/docs/articles/implementing_backends_files/figure-html/poisson_ranks-2.png and b/docs/articles/implementing_backends_files/figure-html/poisson_ranks-2.png differ diff --git a/docs/articles/implementing_backends_files/figure-html/poisson_sim_estimated-1.png b/docs/articles/implementing_backends_files/figure-html/poisson_sim_estimated-1.png index af89262..37e39fa 100644 Binary files a/docs/articles/implementing_backends_files/figure-html/poisson_sim_estimated-1.png and b/docs/articles/implementing_backends_files/figure-html/poisson_sim_estimated-1.png differ diff --git a/docs/articles/implementing_backends_files/header-attrs-2.11/header-attrs.js b/docs/articles/implementing_backends_files/header-attrs-2.11/header-attrs.js deleted file mode 100644 index dd57d92..0000000 --- a/docs/articles/implementing_backends_files/header-attrs-2.11/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/docs/articles/index.html b/docs/articles/index.html index d73e09e..46e2420 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -1,66 +1,12 @@ - - - - - - - -Articles • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Articles • SBC - + + - - - -
-
- -
- -
+

Using SBC for debugging/validating Stan models

-

Case studies showing how problems in a Stan model can be discovered with SBC.

- -
-
Discovering bad parametrization with SBC
-
-
Discovering indexing errors with SBC
-
-
Small model implementation workflow
-
-
-
+

Case studies showing how problems in a Stan model can be discovered with SBC.

+ +
Discovering bad parametrization with SBC
+
+
Discovering indexing errors with SBC
+
+
Small model implementation workflow
+
+
+
Limits of SBC
+
+
SBC for ADVI and optimizing in Stan (+HMMs)
+
+
Implementing a new backend (algorithm) + frequentist approximations
+
+
SBC for brms models
+
+
SBC with discrete parameters in Stan and JAGS
+
+
Rejection sampling in simulations
+
+
-
- +

- - + + diff --git a/docs/articles/indexing.html b/docs/articles/indexing.html index 61265d6..7340d8c 100644 --- a/docs/articles/indexing.html +++ b/docs/articles/indexing.html @@ -19,6 +19,8 @@ + +
+
-

This vignette provides the example used as Exercise 3 of the SBC tutorial presented at SBC StanConnect. Feel free to head to the tutorial website to get an interactive version and solve the problems yourself.

+

This vignette provides the example used as Exercise 3 of the SBC tutorial presented at SBC StanConnect. Feel free to head to the tutorial website to get an interactive version and solve the problems yourself.

Let’s setup the environment.

-library(SBC); 
-library(ggplot2)
+library(SBC); 
+library(ggplot2)
 
-use_cmdstanr <- TRUE # Set to false to use rstan instead
+use_cmdstanr <- getOption("SBC.vignettes_cmdstanr", TRUE) # Set to false to use rstan instead
 
 if(use_cmdstanr) {
-  library(cmdstanr)
+  library(cmdstanr)
 } else {
-  library(rstan)
+  library(rstan)
+  rstan_options(auto_write = TRUE)
 }
 
 # Setup caching of results
-cache_dir <- "./indexing_SBC_cache"
-if(!dir.exists(cache_dir)) {
-  dir.create(cache_dir)
+if(use_cmdstanr) {
+  cache_dir <- "./_indexing_SBC_cache"
+} else {
+  cache_dir <- "./_indexing_rstan_SBC_cache"
+}
+if(!dir.exists(cache_dir)) {
+  dir.create(cache_dir)
 }

Below are three different Stan codes for implementing a simple linear regression. Not all of them are correct - can you see which are wrong?

-cat(readLines("stan/regression1.stan"), sep = "\n")
+cat(readLines("stan/regression1.stan"), sep = "\n")
data {
   int<lower=0> N;   // number of data items
   int<lower=0> K;   // number of predictors
@@ -176,7 +186,7 @@ 

2021-09-26

sigma ~ normal(0, 2); }
-cat(readLines("stan/regression2.stan"), sep = "\n")
+cat(readLines("stan/regression2.stan"), sep = "\n")
data {
   int<lower=0> N;   // number of data items
   int<lower=0> K;   // number of predictors
@@ -202,7 +212,7 @@ 

2021-09-26

sigma ~ normal(0, 2); }
-cat(readLines("stan/regression3.stan"), sep = "\n")
+cat(readLines("stan/regression3.stan"), sep = "\n")
data {
   int<lower=0> N;   // number of data items
   int<lower=0> K;   // number of predictors
@@ -224,17 +234,17 @@ 

2021-09-26

First we’ll build backends using the individual models.

 if(use_cmdstanr) {
-  model_regression_1 <- cmdstan_model("stan/regression1.stan")
-  model_regression_2 <- cmdstan_model("stan/regression2.stan")
-  model_regression_3 <- cmdstan_model("stan/regression3.stan")
+  model_regression_1 <- cmdstan_model("stan/regression1.stan")
+  model_regression_2 <- cmdstan_model("stan/regression2.stan")
+  model_regression_3 <- cmdstan_model("stan/regression3.stan")
 
   backend_regression_1 <- SBC_backend_cmdstan_sample(model_regression_1, iter_warmup = 400, iter_sampling = 500)
   backend_regression_2 <- SBC_backend_cmdstan_sample(model_regression_2, iter_warmup = 400, iter_sampling = 500)
   backend_regression_3 <- SBC_backend_cmdstan_sample(model_regression_3, iter_warmup = 400, iter_sampling = 500)
 } else {
-  model_regression_1 <- stan_model("stan/regression1.stan")
-  model_regression_2 <- stan_model("stan/regression2.stan")
-  model_regression_3 <- stan_model("stan/regression3.stan")
+  model_regression_1 <- stan_model("stan/regression1.stan")
+  model_regression_2 <- stan_model("stan/regression2.stan")
+  model_regression_3 <- stan_model("stan/regression3.stan")
 
   
   backend_regression_1 <- SBC_backend_rstan_sample(model_regression_1, iter = 900, warmup = 400)
@@ -243,85 +253,86 @@ 

2021-09-26

}

Then we’ll write a function that generates data. We write it in the most simple way to reduce the possibility that we make an error. We also don’t really need to worry about performance here.

-single_dataset_regression <- function(N, K) {
-  x <- matrix(rnorm(n = N * K, mean = 0, sd = 1), nrow = N, ncol = K)
-  alpha <- rnorm(n = 1, mean = 0, sd = 1)
-  beta <- rnorm(n = K, mean = 0, sd = 1)
-  sigma <- abs(rnorm(n = 1, mean = 0, sd = 2))
+single_sim_regression <- function(N, K) {
+  x <- matrix(rnorm(n = N * K, mean = 0, sd = 1), nrow = N, ncol = K)
+  alpha <- rnorm(n = 1, mean = 0, sd = 1)
+  beta <- rnorm(n = K, mean = 0, sd = 1)
+  sigma <- abs(rnorm(n = 1, mean = 0, sd = 2))
   
-  y <- array(NA_real_, N)
+  y <- array(NA_real_, N)
   for(n in 1:N) {
     mu <- alpha
     for(k in 1:K) {
       mu <- mu + x[n,k] * beta[k]
     }
-    y[n] <- rnorm(n = 1, mean = mu, sd = sigma) 
+    y[n] <- rnorm(n = 1, mean = mu, sd = sigma) 
   }
   
-  list(
-    parameters = list(
+  list(
+    variables = list(
       alpha = alpha,
       beta = beta,
       sigma = sigma),
-    generated = list(
+    generated = list(
       N = N, K = K,
       x = x, y = y
     )
   )
   
 }
-

We’ll start with just 10 datasets to get a quick computation - this will still let us see big problems (but not subtle issues)

+

We’ll start with just 10 simulations to get a quick computation - this will still let us see big problems (but not subtle issues)

-set.seed(5666024)
+set.seed(5666024)
 datasets_regression <- generate_datasets(
-  SBC_generator_function(single_dataset_regression, N = 100, K = 2), 10)
+ SBC_generator_function(single_sim_regression, N = 100, K = 2), 10)

Now we can use all of the backends to fit the generated datasets.

-results_regression_1 <- compute_results(datasets_regression, backend_regression_1, 
+results_regression_1 <- compute_SBC(datasets_regression, backend_regression_1, 
                     cache_mode = "results", 
-                    cache_location = file.path(cache_dir, "regression1"))
-
## Results loaded from cache file 'regression1'
-
##  - 6 (60%) fits had some steps rejected. Maximum number of rejections was 4.
-
## Not all diagnostics are OK.
-## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
-## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+ cache_location = file.path(cache_dir, "regression1"))
+
## Results loaded from cache file 'regression1'
+
##  - 7 (70%) fits had some steps rejected. Maximum number of rejections was 4.
+
## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
-results_regression_2 <- compute_results(datasets_regression, backend_regression_2, 
+results_regression_2 <- compute_SBC(datasets_regression, backend_regression_2, 
                     cache_mode = "results", 
-                    cache_location = file.path(cache_dir, "regression2"))
-
## Results loaded from cache file 'regression2'
-
##  - 6 (60%) fits had some steps rejected. Maximum number of rejections was 4.
-
## Not all diagnostics are OK.
-## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
-## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
-
-results_regression_3 <- compute_results(datasets_regression, backend_regression_3, 
+                    cache_location = file.path(cache_dir, "regression2"))
+
## Results loaded from cache file 'regression2'
+
##  - 1 (10%) fits had at least one Rhat > 1.01. Largest Rhat was 1.011.
+
##  - 6 (60%) fits had some steps rejected. Maximum number of rejections was 4.
+
## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+
+results_regression_3 <- compute_SBC(datasets_regression, backend_regression_3, 
                     cache_mode = "results", 
-                    cache_location = file.path(cache_dir, "regression3"))
-
## Results loaded from cache file 'regression3'
-
##  - 4 (40%) fits had some steps rejected. Maximum number of rejections was 3.
-
## Not all diagnostics are OK.
-## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
-## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+ cache_location = file.path(cache_dir, "regression3"))
+
## Results loaded from cache file 'regression3'
+
##  - 3 (30%) fits had some steps rejected. Maximum number of rejections was 3.
+
## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.

Here we also use the caching feature to avoid recomputing the fits when recompiling this vignette. In practice, caching is not necessary but is often useful.

-
-plot_ecdf_diff(results_regression_1)
-

+plot_ecdf_diff(results_regression_1)
+

+
 plot_rank_hist(results_regression_1)

As far as a quick SBC can see the first code is OK. You could verify further with more iterations but we tested the model for you and it is OK (although the implementation is not the best one).

-
-plot_ecdf_diff(results_regression_2)
-

+plot_ecdf_diff(results_regression_2)
+

+
 plot_rank_hist(results_regression_2)

-

But the second model is actually not looking good. In fact there is an indexing bug. The problem is the line mu[i] += beta[j] * x[j, j]; which should have x[i, j] instead. We see that this propagates most strongly to the sigma parameter (reusing the same x element leads to more similar predictions for each row, so sigma needs to be inflated to accommodate this)

-
-plot_ecdf_diff(results_regression_3)
-

+

But the second model is actually not looking good. In fact there is an indexing bug. The problem is the line mu[i] += beta[j] * x[j, j]; which should have x[i, j] instead. We see that this propagates most strongly to the sigma variable (reusing the same x element leads to more similar predictions for each row, so sigma needs to be inflated to accommodate this)

+plot_ecdf_diff(results_regression_3)
+

+
 plot_rank_hist(results_regression_3)

And the third model looks OK once again - and in fact we are pretty sure it is also completely correct.

@@ -336,11 +347,13 @@

2021-09-26

-

Site built with pkgdown 1.6.1.

+

+

Site built with pkgdown 2.0.1.

@@ -349,5 +362,7 @@

2021-09-26

+ + diff --git a/docs/articles/indexing_files/figure-html/plots_regression1-1.png b/docs/articles/indexing_files/figure-html/plots_regression1-1.png index 3e37a72..20f115a 100644 Binary files a/docs/articles/indexing_files/figure-html/plots_regression1-1.png and b/docs/articles/indexing_files/figure-html/plots_regression1-1.png differ diff --git a/docs/articles/indexing_files/figure-html/plots_regression1-2.png b/docs/articles/indexing_files/figure-html/plots_regression1-2.png index 0bba616..e5c6015 100644 Binary files a/docs/articles/indexing_files/figure-html/plots_regression1-2.png and b/docs/articles/indexing_files/figure-html/plots_regression1-2.png differ diff --git a/docs/articles/indexing_files/figure-html/plots_regression2-1.png b/docs/articles/indexing_files/figure-html/plots_regression2-1.png index c588bc9..5f31919 100644 Binary files a/docs/articles/indexing_files/figure-html/plots_regression2-1.png and b/docs/articles/indexing_files/figure-html/plots_regression2-1.png differ diff --git a/docs/articles/indexing_files/figure-html/plots_regression2-2.png b/docs/articles/indexing_files/figure-html/plots_regression2-2.png index e70198b..412b451 100644 Binary files a/docs/articles/indexing_files/figure-html/plots_regression2-2.png and b/docs/articles/indexing_files/figure-html/plots_regression2-2.png differ diff --git a/docs/articles/indexing_files/figure-html/plots_regression3-1.png b/docs/articles/indexing_files/figure-html/plots_regression3-1.png index b8d9e7e..1e9e720 100644 Binary files a/docs/articles/indexing_files/figure-html/plots_regression3-1.png and b/docs/articles/indexing_files/figure-html/plots_regression3-1.png differ diff --git a/docs/articles/indexing_files/figure-html/plots_regression3-2.png b/docs/articles/indexing_files/figure-html/plots_regression3-2.png index 8759f61..d21a568 100644 Binary files a/docs/articles/indexing_files/figure-html/plots_regression3-2.png and b/docs/articles/indexing_files/figure-html/plots_regression3-2.png differ diff --git a/docs/articles/indexing_files/header-attrs-2.10/header-attrs.js b/docs/articles/indexing_files/header-attrs-2.10/header-attrs.js deleted file mode 100644 index dd57d92..0000000 --- a/docs/articles/indexing_files/header-attrs-2.10/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/docs/articles/indexing_files/header-attrs-2.11/header-attrs.js b/docs/articles/indexing_files/header-attrs-2.11/header-attrs.js deleted file mode 100644 index dd57d92..0000000 --- a/docs/articles/indexing_files/header-attrs-2.11/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/docs/articles/limits_of_SBC.html b/docs/articles/limits_of_SBC.html index 24e0de6..4527701 100644 --- a/docs/articles/limits_of_SBC.html +++ b/docs/articles/limits_of_SBC.html @@ -19,6 +19,8 @@ + +
+
@@ -132,56 +137,61 @@

2021-09-26

Here, we’ll walk through some problems that are hard/impossible to diagnose with SBC. As usual the focus is on problems with models, assuming our inference algorithm is correct. But for each of those problems, one can imagine a corresponding failure in an algorithm — although some of those failures are quite unlikely for actual algorithms.

-library(SBC)
-library(ggplot2)
-use_cmdstanr <- TRUE # Set to false to use rstan instead
+library(SBC)
+library(ggplot2)
+use_cmdstanr <- getOption("SBC.vignettes_cmdstanr", TRUE) # Set to false to use rstan instead
 
 if(use_cmdstanr) {
-  library(cmdstanr)
+  library(cmdstanr)
 } else {
-  library(rstan)
+  library(rstan)
+  rstan_options(auto_write = TRUE)
 }
 
-options(mc.cores = parallel::detectCores())
+options(mc.cores = parallel::detectCores())
 
-library(future)
-plan(multisession)
+library(future)
+plan(multisession)
 
 # The fits are very fast,
 # so we force a minimum chunk size to reduce overhead of
 # paralellization and decrease computation time.
-options(SBC.min_chunk_size = 5)
+options(SBC.min_chunk_size = 5)
 
 # Setup caching of results
-cache_dir <- "./limits_SBC_cache"
-if(!dir.exists(cache_dir)) {
-  dir.create(cache_dir)
+if(use_cmdstanr) {
+  cache_dir <- "./_limits_SBC_cache"
+} else {
+  cache_dir <- "./_limits_rstan_SBC_cache"  
+}
+if(!dir.exists(cache_dir)) {
+  dir.create(cache_dir)
 }
-
-

-SBC and minor changes to model

+
+

SBC and minor changes to model +

SBC requires a lot of iterations to discover problems (either with model or the algorithm) that are subtle.

To demonstrate this, we will fit a simple model with a normal likelihood, but use Student’s t distribution with 5 degrees of freedom to generate the data.

To see the difference we’ll show the two densities

-x <- seq(-5, 5, length.out = 100)
-dens_data <- rbind(
-  data.frame(x = x, density = dnorm(x, log = FALSE), 
-             log_density = dnorm(x, log = TRUE), type = "normal()"),
-  data.frame(x = x, density = dt(x, df = 5, log = FALSE), 
-             log_density = dt(x, df = 5, log = TRUE), type = "t(5)")) 
-
-ggplot(dens_data, aes(x = x, y = density, color = type)) +
-  geom_line(size = 2)
+x <- seq(-5, 5, length.out = 100) +dens_data <- rbind( + data.frame(x = x, density = dnorm(x, log = FALSE), + log_density = dnorm(x, log = TRUE), type = "normal()"), + data.frame(x = x, density = dt(x, df = 5, log = FALSE), + log_density = dt(x, df = 5, log = TRUE), type = "t(5)")) + +ggplot(dens_data, aes(x = x, y = density, color = type)) + + geom_line(size = 2)

As expected the t distribution has fatter tails, which is even better visible when looking at the logarithm of the density.

-ggplot(dens_data, aes(x = x, y = log_density, color = type)) +
-  geom_line(size = 2)
+ggplot(dens_data, aes(x = x, y = log_density, color = type)) + + geom_line(size = 2)

Here is our Stan code for the simple normal model.

-cat(readLines("stan/minor_discrepancy.stan"), sep = "\n")
+cat(readLines("stan/minor_discrepancy.stan"), sep = "\n")
data {
   int<lower=0> N;
   vector[N] y;
@@ -202,93 +212,93 @@ 

iter_sampling <- 1000 if(use_cmdstanr) { - model_minor <- cmdstan_model("stan/minor_discrepancy.stan") + model_minor <- cmdstan_model("stan/minor_discrepancy.stan") backend_minor <- SBC_backend_cmdstan_sample( model_minor, iter_warmup = iter_warmup, iter_sampling = iter_sampling, chains = 1) } else { - model_minor <- stan_model("stan/minor_discrepancy.stan") + model_minor <- stan_model("stan/minor_discrepancy.stan") backend_minor <- SBC_backend_rstan_sample( model_minor, iter = iter_sampling + iter_warmup, warmup = iter_warmup, chains = 1) }

-

And here we simulate from a student’s t distribution. We scale the distribution so that the sigma parameter is the standard deviation of the distribution.

+

And here we simulate from a student’s t distribution. We scale the distribution so that sigma is the standard deviation of the distribution.

-single_dataset_minor <- function(N) {
-  mu <- rnorm(n = 1, mean = 0, sd = 1)
-  sigma <- abs(rnorm(n = 1, mean = 0, sd = 1))
+single_sim_minor <- function(N) {
+  mu <- rnorm(n = 1, mean = 0, sd = 1)
+  sigma <- abs(rnorm(n = 1, mean = 0, sd = 1))
   nu <- 5
-  student_scale <- sigma / sqrt(nu / (nu - 2))
-  y <- mu + student_scale * rt(N, df = nu)
+  student_scale <- sigma / sqrt(nu / (nu - 2))
+  y <- mu + student_scale * rt(N, df = nu)
   
-  list(
-    parameters = list(mu = mu, sigma = sigma),
-    generated = list(N = N, y = y)
+  list(
+    variables = list(mu = mu, sigma = sigma),
+    generated = list(N = N, y = y)
   )
 }
 
-set.seed(51336848)
-generator_minor <- SBC_generator_function(single_dataset_minor, N = 10)
-datasets_minor <- generate_datasets(generator_minor, n_datasets = 200)
-

Can we see something by looking at the results of just the first 10 datasets? (note that SBC_datasets objects support subsetting).

+set.seed(51336848) +generator_minor <- SBC_generator_function(single_sim_minor, N = 10) +datasets_minor <- generate_datasets(generator_minor, n_sims = 200)
+

Can we see something by looking at the results of just the first 10 simulations? (note that SBC_datasets objects support subsetting).

-results_minor_10 <- compute_results(datasets_minor[1:10], backend_minor, 
+results_minor_10 <- compute_SBC(datasets_minor[1:10], backend_minor, 
                     cache_mode = "results", 
-                    cache_location = file.path(cache_dir, "minor_10"))
-
## Results loaded from cache file 'minor_10'
-
##  - 1 (10%) fits had at least one Rhat > 1.01. Largest Rhat was 1.024.
-
##  - 2 (20%) fits had some steps rejected. Maximum number of rejections was 1.
-
## Not all diagnostics are OK.
-## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
-## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+ cache_location = file.path(cache_dir, "minor_10"))
+
## Results loaded from cache file 'minor_10'
+
##  - 1 (10%) fits had at least one Rhat > 1.01. Largest Rhat was 1.024.
+
##  - 3 (30%) fits had some steps rejected. Maximum number of rejections was 2.
+
## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.

Not really…

 plot_rank_hist(results_minor_10)

-plot_ecdf_diff(results_minor_10)
+plot_ecdf_diff(results_minor_10)

-

Will we have better luck with 100 datasets? (Note that we can use bind_results to combine multiple results, letting us start small, but not throw away the computation spent for the initial SBC runs)

+

Will we have better luck with 100 simulations? (Note that we can use bind_results to combine multiple results, letting us start small, but not throw away the computation spent for the initial simulations)

 results_minor_100 <- bind_results(
   results_minor_10,
-  compute_results(datasets_minor[11:100], backend_minor, 
+  compute_SBC(datasets_minor[11:100], backend_minor, 
                     cache_mode = "results", 
-                    cache_location = file.path(cache_dir, "minor_90"))
+                    cache_location = file.path(cache_dir, "minor_90"))
 )
-
## Results loaded from cache file 'minor_90'
-
##  - 6 (7%) fits had at least one Rhat > 1.01. Largest Rhat was 1.02.
-
##  - 16 (18%) fits had some steps rejected. Maximum number of rejections was 1.
-
## Not all diagnostics are OK.
-## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
-## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
-

Here we see something suspicios with the sigma parameter, but it is not very convincing.

+
## Results loaded from cache file 'minor_90'
+
##  - 6 (7%) fits had at least one Rhat > 1.01. Largest Rhat was 1.02.
+
##  - 17 (19%) fits had some steps rejected. Maximum number of rejections was 2.
+
## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+

Here we see something suspicios with the sigma variable, but it is not very convincing.

 plot_rank_hist(results_minor_100)

-plot_ecdf_diff(results_minor_100)
+plot_ecdf_diff(results_minor_100)

So let’s do additional 100 SBC steps

 results_minor_200 <- bind_results(
   results_minor_100,
-  compute_results(datasets_minor[101:200], backend_minor, 
+  compute_SBC(datasets_minor[101:200], backend_minor, 
                     cache_mode = "results", 
-                    cache_location = file.path(cache_dir, "minor_next_100"))
+                    cache_location = file.path(cache_dir, "minor_next_100"))
 )
-
## Results loaded from cache file 'minor_next_100'
-
##  - 8 (8%) fits had at least one Rhat > 1.01. Largest Rhat was 1.019.
-
##  - 13 (13%) fits had some steps rejected. Maximum number of rejections was 2.
-
## Not all diagnostics are OK.
-## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
-## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+
## Results loaded from cache file 'minor_next_100'
+
##  - 8 (8%) fits had at least one Rhat > 1.01. Largest Rhat was 1.019.
+
##  - 15 (15%) fits had some steps rejected. Maximum number of rejections was 2.
+
## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.

OK, so this looks at least a bit conclusive, but still, the violation of uniformity is not very big.

 plot_rank_hist(results_minor_200)

-plot_ecdf_diff(results_minor_200)
+plot_ecdf_diff(results_minor_200)

If we used more data points per simulation (here we simulated just 10), the problem would likely show faster. In any case, we need a relatively large number of runs to identify small discrepancies with high probability.

But it is also the case that the estimates are not completely meaningless (as the distributions are quite close). One way to look into this is to plot the posterior mean + central 90% interval against the simulated value via plot_sim_estimated. The estimates should cluster around the y=x line (blue), which they mostly do.

@@ -301,52 +311,52 @@

Or we can even directly inspect some intervals of interest:

-coverage <- empirical_coverage(results_minor_200$stats, width = c(0.5,0.9,0.95))
+coverage <- empirical_coverage(results_minor_200$stats, width = c(0.5,0.9,0.95))
 coverage
-
## # A tibble: 6 x 6
-##   parameter width width_represented ci_low estimate ci_high
-##   <chr>     <dbl>             <dbl>  <dbl>    <dbl>   <dbl>
-## 1 mu         0.5              0.495  0.407    0.475   0.544
-## 2 mu         0.9              0.901  0.839    0.89    0.926
-## 3 mu         0.95             0.950  0.936    0.97    0.986
-## 4 sigma      0.5              0.495  0.358    0.425   0.494
-## 5 sigma      0.9              0.901  0.744    0.805   0.854
-## 6 sigma      0.95             0.950  0.862    0.91    0.942
+
## # A tibble: 6 x 6
+##   variable width width_represented ci_low estimate ci_high
+##   <chr>    <dbl>             <dbl>  <dbl>    <dbl>   <dbl>
+## 1 mu        0.5               0.5   0.407    0.475   0.544
+## 2 mu        0.9               0.9   0.845    0.895   0.930
+## 3 mu        0.95              0.95  0.936    0.97    0.986
+## 4 sigma     0.5               0.5   0.363    0.43    0.499
+## 5 sigma     0.9               0.9   0.734    0.795   0.845
+## 6 sigma     0.95              0.95  0.845    0.895   0.930
-sigma_90_coverage_string <- paste0(round(100 * as.numeric(
-  coverage[coverage$parameter == "sigma" & coverage$width == 0.9, c("ci_low","ci_high")])),
+sigma_90_coverage_string <- paste0(round(100 * as.numeric(
+  coverage[coverage$variable == "sigma" & coverage$width == 0.9, c("ci_low","ci_high")])),
   "%",
   collapse = " - ")
-

where we see that for example for the 90% central credible interval of sigma we would expect the actual coverage to be 74% - 85%.

+

where we see that for example for the 90% central credible interval of sigma we would expect the actual coverage to be 73% - 85%.

-
-

-Prior mismatch

+
+

Prior mismatch +

Especially when those affect only prior as SBC is based on fitted posterior - so if prior does not influence posterior very much…

TODO

-
-

-Missing likelihood

+
+

Missing likelihood +

SBC will not notice if you completely omit likelihood from your Stan model!

Here we have a generator for a very simple model with gaussian likelihood:

-single_dataset_missing <- function(N) {
-  mu <- rnorm(n = 1, mean = 0, sd = 1)
-  y <- rnorm(n = N, mean = mu, sd = 1)
+single_sim_missing <- function(N) {
+  mu <- rnorm(n = 1, mean = 0, sd = 1)
+  y <- rnorm(n = N, mean = mu, sd = 1)
   
-  list(
-    parameters = list(mu = mu),
-    generated = list(N = N, y = y)
+  list(
+    variables = list(mu = mu),
+    generated = list(N = N, y = y)
   )
 }
 
-set.seed(25746223)
-generator_missing <- SBC_generator_function(single_dataset_missing, N = 10)
-datasets_missing <- generate_datasets(generator_missing, n_datasets = 200)
+set.seed(25746223) +generator_missing <- SBC_generator_function(single_sim_missing, N = 10) +datasets_missing <- generate_datasets(generator_missing, n_sims = 200)

And here is a model that just completely ignores the data, but has the right prior:

-cat(readLines("stan/missing_likelihood.stan"), sep = "\n")
+cat(readLines("stan/missing_likelihood.stan"), sep = "\n")
data {
   int<lower=0> N;
   vector[N] y;
@@ -364,37 +374,37 @@ 

iter_sampling <- 1000 if(use_cmdstanr) { - model_missing <- cmdstan_model("stan/missing_likelihood.stan") + model_missing <- cmdstan_model("stan/missing_likelihood.stan") backend_missing <- SBC_backend_cmdstan_sample( model_missing, iter_warmup = iter_warmup, iter_sampling = iter_sampling, chains = 1) } else { - model_missing <- stan_model("stan/missing_likelihood.stan") + model_missing <- stan_model("stan/missing_likelihood.stan") backend_missing <- SBC_backend_rstan_sample( model_missing, iter = iter_sampling + iter_warmup, warmup = iter_warmup, chains = 1) }

-

Now we’ll compute the results for 200 simulated datasets:

+

Now we’ll compute the results for 200 simulations:

-results_missing <- compute_results(datasets_missing, backend_missing, 
+results_missing <- compute_SBC(datasets_missing, backend_missing, 
                     cache_mode = "results", 
-                    cache_location = file.path(cache_dir, "missing"))
-
## Results loaded from cache file 'missing'
-
##  - 16 (8%) fits had at least one Rhat > 1.01. Largest Rhat was 1.027.
-
## Not all diagnostics are OK.
-## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
-## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+ cache_location = file.path(cache_dir, "missing"))
+
## Results loaded from cache file 'missing'
+
##  - 15 (8%) fits had at least one Rhat > 1.01. Largest Rhat was 1.027.
+
## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.

And here are our rank plots:

 plot_rank_hist(results_missing)

-plot_ecdf_diff(results_missing)
+plot_ecdf_diff(results_missing)

It’s just nothing out of the ordinary.

-

But we are not completely helpless: This specific type of problem can be noticed by prior/posterior contraction plot. In this plot we compare the prior and posterior standard deviation to get a measure of how much more we know about the parameter after fitting the model. For this model, we can get the prior sd directly, but one can also use a (preferably large) SBC_datasets object to estimate it empirically for more complex models.

+

But we are not completely helpless: This specific type of problem can be noticed by prior/posterior contraction plot. In this plot we compare the prior and posterior standard deviation to get a measure of how much more we know about the variable after fitting the model. For this model, we can get the prior sd directly, but one can also use a (preferably large) SBC_datasets object to estimate it empirically for more complex models.

-prior_sd <- c("mu" = 1)
+prior_sd <- c("mu" = 1)
 #prior_sd <- calculate_prior_sd(generate_datasets(generator_missing, 1000))
 plot_contraction(results_missing, prior_sd)

@@ -403,43 +413,43 @@

 plot_sim_estimated(results_missing, alpha = 0.5)

-

There is however even more powerful method - and that is to include the likelihood in the SBC. This is most easily done by adding a “generated quantity” to the SBC results - this is a function that is evaluated within the context of the parameters AND data. And it can be added without recomputing the fits!

+

There is however even more powerful method - and that is to include the likelihood in the SBC. This is most easily done by adding a “generated quantity” to the SBC results - this is a function that is evaluated within the context of the variables AND data. And it can be added without recomputing the fits!

 normal_lpdf <- function(y, mu, sigma) {
-  sum(dnorm(y, mean = mu, sd = sigma, log = TRUE))
+  sum(dnorm(y, mean = mu, sd = sigma, log = TRUE))
 }
 
 log_lik_gq <- generated_quantities(log_lik = normal_lpdf(y, mu, 1), 
                                    .globals = "normal_lpdf" )
 
-results_missing_gq <- recompute_statistics(
+results_missing_gq <- recompute_SBC_statistics(
   results_missing, datasets_missing, 
   backend = backend_missing, gen_quants = log_lik_gq)
-
##  - 19 (10%) fits had at least one Rhat > 1.01. Largest Rhat was 1.027.
-
## Not all diagnostics are OK.
-## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
-## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+
##  - 19 (10%) fits had at least one Rhat > 1.01. Largest Rhat was 1.027.
+
## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.

The rank plots for the log_lik quantity immediately shows a severe problem:

-plot_ecdf_diff(results_missing_gq)
+plot_ecdf_diff(results_missing_gq)

 plot_rank_hist(results_missing_gq)

-
-

-Partially missing likelihood

+
+

Partially missing likelihood +

A more complicated case is when the likelihood is only slightly wrong (and missing something) - e.g. due to an indexing error. Turns out missing just one data point needs a lot of simulations to see, so we’ll write a model that ignores a full half of the data points.

-cat(readLines("stan/partially_missing_likelihood.stan"), sep = "\n")
+cat(readLines("stan/partially_missing_likelihood.stan"), sep = "\n")
data {
   int<lower=0> N;
   vector[N] y;
 }
 
 transformed data {
-  int N2 = N / 2 + 1;
+  int N2 = N %/% 2 + 1;
 }
 
 parameters {
@@ -454,37 +464,37 @@ 

}

 if(use_cmdstanr) {
-  model_missing_2 <- cmdstan_model("stan/partially_missing_likelihood.stan")
+  model_missing_2 <- cmdstan_model("stan/partially_missing_likelihood.stan")
 
   backend_missing_2 <- SBC_backend_cmdstan_sample(
     model_missing_2, iter_warmup = iter_warmup, iter_sampling = iter_sampling, chains = 1)
 } else {
-  model_missing_2 <- stan_model("stan/partially_missing_likelihood.stan")
+  model_missing_2 <- stan_model("stan/partially_missing_likelihood.stan")
 
   backend_missing_2 <- SBC_backend_rstan_sample(
     model_missing_2, iter = iter_sampling + iter_warmup, warmup = iter_warmup, chains = 1)
 }
-

Let us use this model for the same dataset.

+

Let us use this model for the same set of simulations.

-results_missing_2 <- compute_results(datasets_missing, backend_missing_2, gen_quants = log_lik_gq, 
+results_missing_2 <- compute_SBC(datasets_missing, backend_missing_2, gen_quants = log_lik_gq, 
                     cache_mode = "results", 
-                    cache_location = file.path(cache_dir, "missing_2"))
-
## Results loaded from cache file 'missing_2'
-
##  - 20 (10%) fits had at least one Rhat > 1.01. Largest Rhat was 1.031.
-
## Not all diagnostics are OK.
-## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
-## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+ cache_location = file.path(cache_dir, "missing_2"))
+
## Results loaded from cache file 'missing_2'
+
##  - 21 (10%) fits had at least one Rhat > 1.01. Largest Rhat was 1.031.
+
## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.

The contraction plot would not show anything suspicious - we get decent contraction

-plot_contraction(results_missing_2, prior_sd, parameters = "mu")
+plot_contraction(results_missing_2, prior_sd, variables = "mu")

Similarly, our posterior estimates now cluster around the true values.

-plot_sim_estimated(results_missing_2, parameters = "mu", alpha = 0.5)
+plot_sim_estimated(results_missing_2, variables = "mu", alpha = 0.5)

Now contraction is pretty high, and mu is behaving well, but our log_lik generated quantity shows a clear problem

-plot_ecdf_diff(results_missing_2)
+plot_ecdf_diff(results_missing_2)

 plot_rank_hist(results_missing_2)
@@ -504,11 +514,13 @@

-

Site built with pkgdown 1.6.1.

+

+

Site built with pkgdown 2.0.1.

@@ -517,5 +529,7 @@

+ + diff --git a/docs/articles/limits_of_SBC_files/figure-html/dens_compare-1.png b/docs/articles/limits_of_SBC_files/figure-html/dens_compare-1.png index 8bc73d8..97cc82f 100644 Binary files a/docs/articles/limits_of_SBC_files/figure-html/dens_compare-1.png and b/docs/articles/limits_of_SBC_files/figure-html/dens_compare-1.png differ diff --git a/docs/articles/limits_of_SBC_files/figure-html/log_dens_compare-1.png b/docs/articles/limits_of_SBC_files/figure-html/log_dens_compare-1.png index 46b441f..9364855 100644 Binary files a/docs/articles/limits_of_SBC_files/figure-html/log_dens_compare-1.png and b/docs/articles/limits_of_SBC_files/figure-html/log_dens_compare-1.png differ diff --git a/docs/articles/limits_of_SBC_files/figure-html/results_minor_100_plots-1.png b/docs/articles/limits_of_SBC_files/figure-html/results_minor_100_plots-1.png index 583db53..9cf270f 100644 Binary files a/docs/articles/limits_of_SBC_files/figure-html/results_minor_100_plots-1.png and b/docs/articles/limits_of_SBC_files/figure-html/results_minor_100_plots-1.png differ diff --git a/docs/articles/limits_of_SBC_files/figure-html/results_minor_100_plots-2.png b/docs/articles/limits_of_SBC_files/figure-html/results_minor_100_plots-2.png index e512e85..632a7a4 100644 Binary files a/docs/articles/limits_of_SBC_files/figure-html/results_minor_100_plots-2.png and b/docs/articles/limits_of_SBC_files/figure-html/results_minor_100_plots-2.png differ diff --git a/docs/articles/limits_of_SBC_files/figure-html/results_minor_10_plots-1.png b/docs/articles/limits_of_SBC_files/figure-html/results_minor_10_plots-1.png index 248f995..9d6a8b4 100644 Binary files a/docs/articles/limits_of_SBC_files/figure-html/results_minor_10_plots-1.png and b/docs/articles/limits_of_SBC_files/figure-html/results_minor_10_plots-1.png differ diff --git a/docs/articles/limits_of_SBC_files/figure-html/results_minor_10_plots-2.png b/docs/articles/limits_of_SBC_files/figure-html/results_minor_10_plots-2.png index c95d527..f748308 100644 Binary files a/docs/articles/limits_of_SBC_files/figure-html/results_minor_10_plots-2.png and b/docs/articles/limits_of_SBC_files/figure-html/results_minor_10_plots-2.png differ diff --git a/docs/articles/limits_of_SBC_files/figure-html/results_minor_200_coverage-1.png b/docs/articles/limits_of_SBC_files/figure-html/results_minor_200_coverage-1.png index eef46b8..6e214a1 100644 Binary files a/docs/articles/limits_of_SBC_files/figure-html/results_minor_200_coverage-1.png and b/docs/articles/limits_of_SBC_files/figure-html/results_minor_200_coverage-1.png differ diff --git a/docs/articles/limits_of_SBC_files/figure-html/results_minor_200_plots-1.png b/docs/articles/limits_of_SBC_files/figure-html/results_minor_200_plots-1.png index 692e195..d8792eb 100644 Binary files a/docs/articles/limits_of_SBC_files/figure-html/results_minor_200_plots-1.png and b/docs/articles/limits_of_SBC_files/figure-html/results_minor_200_plots-1.png differ diff --git a/docs/articles/limits_of_SBC_files/figure-html/results_minor_200_plots-2.png b/docs/articles/limits_of_SBC_files/figure-html/results_minor_200_plots-2.png index d40ad49..678b213 100644 Binary files a/docs/articles/limits_of_SBC_files/figure-html/results_minor_200_plots-2.png and b/docs/articles/limits_of_SBC_files/figure-html/results_minor_200_plots-2.png differ diff --git a/docs/articles/limits_of_SBC_files/figure-html/results_minor_200_sim_estimated-1.png b/docs/articles/limits_of_SBC_files/figure-html/results_minor_200_sim_estimated-1.png index cc10009..daddca8 100644 Binary files a/docs/articles/limits_of_SBC_files/figure-html/results_minor_200_sim_estimated-1.png and b/docs/articles/limits_of_SBC_files/figure-html/results_minor_200_sim_estimated-1.png differ diff --git a/docs/articles/limits_of_SBC_files/figure-html/results_missing_2_contraction-1.png b/docs/articles/limits_of_SBC_files/figure-html/results_missing_2_contraction-1.png index c21abbe..ed3b221 100644 Binary files a/docs/articles/limits_of_SBC_files/figure-html/results_missing_2_contraction-1.png and b/docs/articles/limits_of_SBC_files/figure-html/results_missing_2_contraction-1.png differ diff --git a/docs/articles/limits_of_SBC_files/figure-html/results_missing_2_plots-1.png b/docs/articles/limits_of_SBC_files/figure-html/results_missing_2_plots-1.png index 070b3ea..78a5ba0 100644 Binary files a/docs/articles/limits_of_SBC_files/figure-html/results_missing_2_plots-1.png and b/docs/articles/limits_of_SBC_files/figure-html/results_missing_2_plots-1.png differ diff --git a/docs/articles/limits_of_SBC_files/figure-html/results_missing_2_plots-2.png b/docs/articles/limits_of_SBC_files/figure-html/results_missing_2_plots-2.png index 7b6aaf7..999c5f5 100644 Binary files a/docs/articles/limits_of_SBC_files/figure-html/results_missing_2_plots-2.png and b/docs/articles/limits_of_SBC_files/figure-html/results_missing_2_plots-2.png differ diff --git a/docs/articles/limits_of_SBC_files/figure-html/results_missing_2_sim_estimated-1.png b/docs/articles/limits_of_SBC_files/figure-html/results_missing_2_sim_estimated-1.png index 0de2017..40fb32e 100644 Binary files a/docs/articles/limits_of_SBC_files/figure-html/results_missing_2_sim_estimated-1.png and b/docs/articles/limits_of_SBC_files/figure-html/results_missing_2_sim_estimated-1.png differ diff --git a/docs/articles/limits_of_SBC_files/figure-html/results_missing_contraction-1.png b/docs/articles/limits_of_SBC_files/figure-html/results_missing_contraction-1.png index a6313a5..2fe7130 100644 Binary files a/docs/articles/limits_of_SBC_files/figure-html/results_missing_contraction-1.png and b/docs/articles/limits_of_SBC_files/figure-html/results_missing_contraction-1.png differ diff --git a/docs/articles/limits_of_SBC_files/figure-html/results_missing_gq_plots-1.png b/docs/articles/limits_of_SBC_files/figure-html/results_missing_gq_plots-1.png index f1e0127..ad92e11 100644 Binary files a/docs/articles/limits_of_SBC_files/figure-html/results_missing_gq_plots-1.png and b/docs/articles/limits_of_SBC_files/figure-html/results_missing_gq_plots-1.png differ diff --git a/docs/articles/limits_of_SBC_files/figure-html/results_missing_gq_plots-2.png b/docs/articles/limits_of_SBC_files/figure-html/results_missing_gq_plots-2.png index dde28df..fed8f85 100644 Binary files a/docs/articles/limits_of_SBC_files/figure-html/results_missing_gq_plots-2.png and b/docs/articles/limits_of_SBC_files/figure-html/results_missing_gq_plots-2.png differ diff --git a/docs/articles/limits_of_SBC_files/figure-html/results_missing_plots-1.png b/docs/articles/limits_of_SBC_files/figure-html/results_missing_plots-1.png index 7f602b6..e21fee4 100644 Binary files a/docs/articles/limits_of_SBC_files/figure-html/results_missing_plots-1.png and b/docs/articles/limits_of_SBC_files/figure-html/results_missing_plots-1.png differ diff --git a/docs/articles/limits_of_SBC_files/figure-html/results_missing_plots-2.png b/docs/articles/limits_of_SBC_files/figure-html/results_missing_plots-2.png index b305b91..0b8ad1b 100644 Binary files a/docs/articles/limits_of_SBC_files/figure-html/results_missing_plots-2.png and b/docs/articles/limits_of_SBC_files/figure-html/results_missing_plots-2.png differ diff --git a/docs/articles/limits_of_SBC_files/figure-html/results_missing_sim_estimated-1.png b/docs/articles/limits_of_SBC_files/figure-html/results_missing_sim_estimated-1.png index e0f1145..908fe60 100644 Binary files a/docs/articles/limits_of_SBC_files/figure-html/results_missing_sim_estimated-1.png and b/docs/articles/limits_of_SBC_files/figure-html/results_missing_sim_estimated-1.png differ diff --git a/docs/articles/limits_of_SBC_files/header-attrs-2.10/header-attrs.js b/docs/articles/limits_of_SBC_files/header-attrs-2.10/header-attrs.js deleted file mode 100644 index dd57d92..0000000 --- a/docs/articles/limits_of_SBC_files/header-attrs-2.10/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/docs/articles/limits_of_SBC_files/header-attrs-2.11/header-attrs.js b/docs/articles/limits_of_SBC_files/header-attrs-2.11/header-attrs.js deleted file mode 100644 index dd57d92..0000000 --- a/docs/articles/limits_of_SBC_files/header-attrs-2.11/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/docs/articles/overview_wide.png b/docs/articles/overview_wide.png deleted file mode 100644 index ec8847c..0000000 Binary files a/docs/articles/overview_wide.png and /dev/null differ diff --git a/docs/articles/rank_visualizations.html b/docs/articles/rank_visualizations.html new file mode 100644 index 0000000..061b7c8 --- /dev/null +++ b/docs/articles/rank_visualizations.html @@ -0,0 +1,324 @@ + + + + + + + +SBC rank visualizations • SBC + + + + + + + + + + + + +
+
+ + + + +
+
+ + + + +

This vignette aims to explain how to interpret various visualizations of the main results of SBC: the posterior ranks. If the model + algorithm works correctly, the posterior ranks of the prior draws should be distributed uniformly. While one could test for this uniformity numerically via some sort of statistical test, it is often more informative to look at visualizations that not only tell us whether a problem appears, but what kind of problem and how severe it is.

+

In this vignette, we’ll first explain each of the visualizations individually and then show them side-by-side for the same configuration.

+ +
+

Simulation setup +

+

We’ll use the built-in example that simulates ranks from known prior and data-averaged posterior distributions.

+
+set.seed(22654885)
+res_50 <- SBC_example_results("visualizations", n_sims = 50)
+# The example results have this extra attribute showing analytic densities,
+# this is not a part of normal SBC results
+density_df <- attr(res_50, "density_df") 
+

This is how the corresponding analytical densities look like - in the “Exact match” case they overlap and SBC should pass, in all other cases it should signal issues.

+
+plot_density_comparison <- function(density_df) {
+  ggplot(density_df, aes(x = x, y = density, color = type, size = type)) + 
+    geom_line(data = dplyr::filter(density_df, type == "Prior")) + 
+    geom_line(data = dplyr::filter(density_df, type == "Data-averaged posterior"), alpha = 0.8) + 
+    facet_wrap(~variable) + 
+    scale_color_manual("", values = c("Prior" = "skyblue1", 
+                                    "Data-averaged posterior" = "black")) + 
+    scale_size_manual("", values = c("Prior" = 2, 
+                                    "Data-averaged posterior" = 1)) +
+    theme(legend.position = "bottom")
+}
+
+plot_density_comparison(density_df)
+

+

Now let’s look how these issues manifest in various plots:

+
+
+

+plot_rank_hist - The rank histogram +

+

Rank histogram is probably the simplest of the visualizations. We plot a histogram of the ranks and look if all bins are roughly equally represented. The expected average count is shown as a horizontal black line, and an approximate interval highlighting expected deviations (by default 95%) is shown as the light blue wedged rectangle in the background.

+ +

+

The two main disadvantages of this plot are:

+
    +
  1. Since the confidence interval is only approximate, it cannot be taken too seriously. However, gross violations are still immediately visible.
  2. +
  3. It is sensitive to the choice of number of bins. There is a tradeoff in that more bins mean more resolution, but less power to detect some violations of uniformity. Additionally, one needs to be mindful of the total number of ranks as the number of bins should preferably divide the number of ranks.
  4. +
+

This is the reason why the “Some extra-low estimates” case is not visible with the default number of bins - the extra low estimates (and thus high ranks) get smoothed by being combined into a wider bin. We can plot the same results but with the maximum number of bins (100, as the ranks in our case range from 0 to 99):

+
+plot_rank_hist(res_50, bins = 100)
+

+

This lets us see something suspicious for the “Some extra-low estimates” case, but we’ve increased noise overall and the other patterns become harder to see.

+

Additionally, if the number of bins does not divide the total number of ranks (here 100) neatly, some bins are expected to get slightly more ranks than others. The plot compensates for this by extending the confidence interval to cover both cases, resulting in some loss of precision - here a particularly bad choice of the number of bins obscures problems in “Model too uncertain” and “Some extra-low estimates”.

+
+plot_rank_hist(res_50, bins = 17)
+

+

Choice of number of bins obviously becomes less of a problem, if we have a large number of simulations. With 1000 simulations, the patterns are clear and unmistakable.

+
+res_1000 <- SBC_example_results("visualizations", n_sims = 1000)
+plot_rank_hist(res_1000)
+

+

We should also note that since we are essentially performing many comparisons, seeing a low number of “failures” in some bins is to be expected and does not necessarily signal a problem - in the plot above the “Exact match” case has several bins outside of the approximate confidence interval.

+
+
+

+plot_ecdf and plot_ecdf_diff - ECDF plots +

+

These two related plots remedy the main problems of the rank histogram - they do not depend on any binning and provide exact confidence intervals. The ECDF plot shows the empirical cumulative distribution function (ECDF). If ranks were perfectly uniform, this would be a “diagonal staircase”, but some deviations from exact uniformity are to be expected. The ECDF plot shows aN ellipse outlining the expected deviations (by default at the 95% level). It looks like this:

+
+plot_ecdf(res_50)
+

+

A minor problem with this visualization is that the top-left and bottom-right parts of the plot are usually left unused and as the number of simulations grows, it may become hard to discern details in the center. Let us look at the same plot from 1000 simulations:

+
+plot_ecdf(res_1000)
+

+

Now it gets a bit hard to see, whether the “Exact match” case is well within the ellipse or rather hitting the boundaries.

+

The ECDF diff plot shows exactly the same information as the ECDF plot, but looks not at the ECDF itself, but rather on the difference between the perfectly uniform CDF and the ECDF. In other words, it rotates the ECDF plot by 45 degrees to the right to make the uniform CDF a flat line:

+
+plot_ecdf_diff(res_1000)
+

+

Now, we get a much increased resolution for the “Exact match” case. Also note that in the rank histogram the “Some extra-low estimates” case showed only as a failure in the highest ranks. However, the ECDF and ECDF diff plots also show how the slight under-abundance of the low ranks - which is not noticeable when looking at each rank / rank bin individually - slowly adds up and by 50th percentile we already see a problem.

+

The ECDF diff plot usually looks better than the ECDF plot even with lower number of simulations and is thus preferable to ECDF in most cases:

+ +

+

A downside of the ECDF and especially the ECDF diff plot is that the connection between the shape seen in the plot and the type of the failure is less straightforward.

+
+
+

+plot_coverage and plot_coverage_diff - Empirical coverage +

+

The rank histogram and both ECDF plots are useful for noticing that there is a problem in the model and what type of mismatch are we seeing. However, it is a bit harder to interpret how bad the failures actually are for inference and how large problems could still be unnoticed because we ran too few simulations.

+

The empirical coverage tries to help with that. It builds on the empirical_coverage() function and by default shows the coverage of the central posterior credible intervals (coverage is the proportion of true variable values that fall within the interval). A well working model would have coverage exactly match interval width (i.e. 95% credible interval contains the true value 95% of the time) as shown by the blue line. The focus on central intervals is often more relevant to inference than the leftmost intervals implied in the ECDF plots. The coverage is accompanied by approximate credible intervals for the coverage (gray).

+
+plot_coverage(res_50)
+

+

This lets us neatly see that with 50 simulations, we still cannot rule even relatively large miscalibration in the “Exact match” case where e.g. the 50% central interval could still contain about 70% of the true values. A downside of the focus on central intervals is that underestimation and overestimation now produce the same overall shape in the plot.

+

For similar reasons as with the ECDF plot, there is also “difference” version of the plot that takes the differences in coverage into focus.

+ +

+

In the example here, all the problematic scenarios manifest also as problems on the empirical coverage plot. However, empirical coverage, especially for the central intervals has some notable limitations as a diagnostic and thus should always be complemented by a rank histogram / ECDF plot - see help(empirical_coverage) for some additional details.

+
+
+

Side by side comparison +

+

To let us better understand how the various plots relate, we will know plot the scenarios one by one, showing all plots for the same scenario side-by-side.

+
+plot_side_by_side <- function(res, var) {
+  legend_bottom <- theme(legend.position = "bottom",
+                         legend.direction = "vertical",
+                         legend.margin = margin(t=-1, unit = "cm")
+                         )
+  # Hack - use variable name to show plot type
+  density_df_to_plot <- dplyr::filter(density_df, variable == var)
+  density_df_to_plot$variable <- "Densities"
+  
+  stats <- dplyr::filter(res$stats, variable == var)
+  
+  p_dens <- plot_density_comparison(density_df_to_plot) +
+    legend_bottom
+  p_rank <- plot_rank_hist(dplyr::mutate(stats, variable = "Rank histogram"))
+  p_ecdf <- plot_ecdf(dplyr::mutate(stats, variable = "ECDF")) + legend_bottom
+  p_ecdf_diff <- plot_ecdf_diff(dplyr::mutate(stats, variable = "ECDF diff")) + legend_bottom
+  p_coverage <- plot_coverage(dplyr::mutate(stats, variable = "Coverage"))
+  p_coverage_diff <- plot_coverage_diff(dplyr::mutate(stats, variable = "Coverage diff"))
+  p_dens  + p_ecdf + p_ecdf_diff + p_rank + p_coverage + p_coverage_diff +
+    plot_annotation(var)
+}
+

We will start with the “Exact match” (i.e. no problem) scenario with 50 simulations.

+
+plot_side_by_side(res_50, "Exact match")
+

+

The relative utility of the _diff versions of the plots changes if we have more simulations:

+
+plot_side_by_side(res_1000, "Exact match")
+

+

If the model is too certain, it will have over-abundance of extreme ranks and lower than expected coverage.

+
+plot_side_by_side(res_50, "Model too certain")
+

+

If the model is overly uncertain, it will have overabundance of central ranks (and too few extreme ranks) and the coverage will be higher than expected.

+
+plot_side_by_side(res_50, "Model too uncertain")
+

+

If the model is underestimating, we will see too many high ranks and coverage will be lower than expected.

+
+plot_side_by_side(res_50, "Model underestimating")
+

+

If the model is overestimating we will see too many low ranks, while the effect on central interval coverage will be similar to underestimation and the coverage will be lower than expected.

+
+plot_side_by_side(res_50, "Model overestimating")
+

+
+
+ + + +
+ + + +
+ +
+

+

Site built with pkgdown 2.0.1.

+
+ +
+
+ + + + + + + + diff --git a/docs/articles/rank_visualizations_files/figure-html/coverage_50-1.png b/docs/articles/rank_visualizations_files/figure-html/coverage_50-1.png new file mode 100644 index 0000000..0156f7e Binary files /dev/null and b/docs/articles/rank_visualizations_files/figure-html/coverage_50-1.png differ diff --git a/docs/articles/rank_visualizations_files/figure-html/coverage_diff_50-1.png b/docs/articles/rank_visualizations_files/figure-html/coverage_diff_50-1.png new file mode 100644 index 0000000..36f6b0d Binary files /dev/null and b/docs/articles/rank_visualizations_files/figure-html/coverage_diff_50-1.png differ diff --git a/docs/articles/rank_visualizations_files/figure-html/densities-1.png b/docs/articles/rank_visualizations_files/figure-html/densities-1.png new file mode 100644 index 0000000..c628aa8 Binary files /dev/null and b/docs/articles/rank_visualizations_files/figure-html/densities-1.png differ diff --git a/docs/articles/rank_visualizations_files/figure-html/ecdf_1000-1.png b/docs/articles/rank_visualizations_files/figure-html/ecdf_1000-1.png new file mode 100644 index 0000000..05b6ec4 Binary files /dev/null and b/docs/articles/rank_visualizations_files/figure-html/ecdf_1000-1.png differ diff --git a/docs/articles/rank_visualizations_files/figure-html/ecdf_50-1.png b/docs/articles/rank_visualizations_files/figure-html/ecdf_50-1.png new file mode 100644 index 0000000..b089bc8 Binary files /dev/null and b/docs/articles/rank_visualizations_files/figure-html/ecdf_50-1.png differ diff --git a/docs/articles/rank_visualizations_files/figure-html/ecdf_diff_1000-1.png b/docs/articles/rank_visualizations_files/figure-html/ecdf_diff_1000-1.png new file mode 100644 index 0000000..675f1fe Binary files /dev/null and b/docs/articles/rank_visualizations_files/figure-html/ecdf_diff_1000-1.png differ diff --git a/docs/articles/rank_visualizations_files/figure-html/ecdf_diff_50-1.png b/docs/articles/rank_visualizations_files/figure-html/ecdf_diff_50-1.png new file mode 100644 index 0000000..7d661f0 Binary files /dev/null and b/docs/articles/rank_visualizations_files/figure-html/ecdf_diff_50-1.png differ diff --git a/docs/articles/rank_visualizations_files/figure-html/rank_hist_1000-1.png b/docs/articles/rank_visualizations_files/figure-html/rank_hist_1000-1.png new file mode 100644 index 0000000..b4cf808 Binary files /dev/null and b/docs/articles/rank_visualizations_files/figure-html/rank_hist_1000-1.png differ diff --git a/docs/articles/rank_visualizations_files/figure-html/rank_hist_50_100-1.png b/docs/articles/rank_visualizations_files/figure-html/rank_hist_50_100-1.png new file mode 100644 index 0000000..84be18d Binary files /dev/null and b/docs/articles/rank_visualizations_files/figure-html/rank_hist_50_100-1.png differ diff --git a/docs/articles/rank_visualizations_files/figure-html/rank_hist_50_17-1.png b/docs/articles/rank_visualizations_files/figure-html/rank_hist_50_17-1.png new file mode 100644 index 0000000..f3f06b4 Binary files /dev/null and b/docs/articles/rank_visualizations_files/figure-html/rank_hist_50_17-1.png differ diff --git a/docs/articles/rank_visualizations_files/figure-html/rank_hist_50_default-1.png b/docs/articles/rank_visualizations_files/figure-html/rank_hist_50_default-1.png new file mode 100644 index 0000000..88e98d7 Binary files /dev/null and b/docs/articles/rank_visualizations_files/figure-html/rank_hist_50_default-1.png differ diff --git a/docs/articles/rank_visualizations_files/figure-html/side_by_side_1000_exact_match-1.png b/docs/articles/rank_visualizations_files/figure-html/side_by_side_1000_exact_match-1.png new file mode 100644 index 0000000..1772953 Binary files /dev/null and b/docs/articles/rank_visualizations_files/figure-html/side_by_side_1000_exact_match-1.png differ diff --git a/docs/articles/rank_visualizations_files/figure-html/side_by_side_50_exact_match-1.png b/docs/articles/rank_visualizations_files/figure-html/side_by_side_50_exact_match-1.png new file mode 100644 index 0000000..10b2209 Binary files /dev/null and b/docs/articles/rank_visualizations_files/figure-html/side_by_side_50_exact_match-1.png differ diff --git a/docs/articles/rank_visualizations_files/figure-html/side_by_side_50_overest-1.png b/docs/articles/rank_visualizations_files/figure-html/side_by_side_50_overest-1.png new file mode 100644 index 0000000..f3220be Binary files /dev/null and b/docs/articles/rank_visualizations_files/figure-html/side_by_side_50_overest-1.png differ diff --git a/docs/articles/rank_visualizations_files/figure-html/side_by_side_50_too_certain-1.png b/docs/articles/rank_visualizations_files/figure-html/side_by_side_50_too_certain-1.png new file mode 100644 index 0000000..66e3200 Binary files /dev/null and b/docs/articles/rank_visualizations_files/figure-html/side_by_side_50_too_certain-1.png differ diff --git a/docs/articles/rank_visualizations_files/figure-html/side_by_side_50_too_uncertain-1.png b/docs/articles/rank_visualizations_files/figure-html/side_by_side_50_too_uncertain-1.png new file mode 100644 index 0000000..cd1a13f Binary files /dev/null and b/docs/articles/rank_visualizations_files/figure-html/side_by_side_50_too_uncertain-1.png differ diff --git a/docs/articles/rank_visualizations_files/figure-html/side_by_side_50_underest-1.png b/docs/articles/rank_visualizations_files/figure-html/side_by_side_50_underest-1.png new file mode 100644 index 0000000..d1d1ae8 Binary files /dev/null and b/docs/articles/rank_visualizations_files/figure-html/side_by_side_50_underest-1.png differ diff --git a/docs/articles/rejection_sampling.html b/docs/articles/rejection_sampling.html index 308c47e..a7444d1 100644 --- a/docs/articles/rejection_sampling.html +++ b/docs/articles/rejection_sampling.html @@ -5,13 +5,13 @@ -Rejection sampling in dataset generation • SBC +Rejection sampling in simulations • SBC - + + +
+
-

In some cases, one may want to exclude extreme datasets from SBC (e.g. because those datasets create divergences). It is best to use prior predictive checks to examine your priors and change them to avoid the extreme datasets. In some cases, this may however be impractical/impossible to do via prior choice - one example are regression coefficients, where once we have many predictors, any independent prior that is not very strict will lead to unrealistic predictions. Joint priors are needed in such case, but those are not well understood and easy to use. See Paul Bürkner’s talk on SBC StanConnect for more context.

-

An alternative is to use rejection sampling i.e. we repeatedly generate a dataset and only accept it as a dataset when it passes a certain condition we impose (e.g. that no observed count is larger than \(10^8\)). But does rejection sampling when generating datasets affect the validity of SBC?

-

Thanks to forum user Niko Huurre who derived the necessary math at Stan Discourse discussion of the topic we know exactly when it is OK. Briefly: for algorithms that only need to know the posterior density up to a constant (which includes Stan and many others), it is OK as long as the rejection criterion only uses observed data and not the unobserved parameters.

+

In some cases, one may want to exclude extreme simulations from SBC (e.g. because those simulations create divergences). It is best to use prior predictive checks to examine your priors and change them to avoid extremes in the simulated data. In some cases, this may however be impractical/impossible to do via prior choice - one example are regression coefficients, where once we have many predictors, any independent prior that is not very strict will lead to unrealistic predictions. Joint priors are needed in such case, but those are not well understood and easy to use. See Paul Bürkner’s talk on SBC StanConnect for more context.

+

An alternative is to use rejection sampling i.e. we repeatedly generate a simulation and only accept it when it passes a certain condition we impose (e.g. that no observed count is larger than \(10^8\)). But does rejection sampling when generating simulations affect the validity of SBC?

+

Thanks to forum user Niko Huurre who derived the necessary math at Stan Discourse discussion of the topic we know exactly when it is OK. Briefly: for algorithms that only need to know the posterior density up to a constant (which includes Stan and many others), it is OK as long as the rejection criterion only uses observed data and not the unobserved variables.

We’ll first walk through the math and then show examples of both OK and problematic rejection sampling.

-
-

-The math

-

Let \(f\left(y\right)\) be the probability that the simulated dataset \(y\) is rejected (usually a 0-1 function if you have a clear idea what a “bad” dataset looks like, but could be probabilistic if you’re relying on finicky diagnostics). The important numbers are the probability of rejection for parameter \(\theta\)

+
+

The math +

+

Let \(f\left(y\right)\) be the probability that the simulated data \(y\) is rejected (usually a 0-1 function if you have a clear idea what a “bad” dataset looks like, but could be probabilistic if you’re relying on finicky diagnostics). The important numbers are the probability of rejection for variable \(\theta\)

\[ L\left(\theta\right)=\int f\left(y\right)\pi\left(y|\theta\right)\mathrm{d}y \]

@@ -145,7 +150,7 @@

\[ R=\iint f\left(y\right)\pi\left(y|\theta\right)\pi\left(\theta\right)\mathrm{d}y\mathrm{d}\theta=\int L\left(\theta\right)\pi\left(\theta\right)\mathrm{d}\theta \]

-

Rejecting the parameter draw when it generates a “bad” dataset effectively distorts the prior

+

Rejecting the variable draw when it generates “bad” data effectively distorts the prior

\[ \pi\left(\theta\right)\to\frac{L\left(\theta\right)}{R}\pi\left(\theta\right) \]

@@ -161,39 +166,44 @@

\[ \pi(\theta | y) \propto \frac{L(\theta)}{R} \pi(y | \theta) \frac{f(y)}{L(\theta)} \pi(\theta) = \frac{f(y)}{R} \pi(y | \theta) \pi(\theta) \]

-

And since \(\frac{f(y)}{R}\) is a constant for any given dataset (and hence the fit), the overall posterior for Stan (and most other MCMC algorithms) is the same, because Stan only needs the posterior density up to a constant. So whether we take rejection into account or not, the model will match the generating process. However, if \(f\) also depended on \(\theta\), it would no longer contribute a constant and we’ll get a mismatch between the generator and model.

+

And since \(\frac{f(y)}{R}\) is a constant for any given simulation (and hence the fit), the overall posterior for Stan (and most other MCMC algorithms) is the same, because Stan only needs the posterior density up to a constant. So whether we take rejection into account or not, the model will match the generating process. However, if \(f\) also depended on \(\theta\), it would no longer contribute a constant and we’ll get a mismatch between the generator and model.

-
-

-Practical examples

+
+

Practical examples +

So let’s see if that also happens in practice. Let’s setup our environment:

-library(SBC)
+library(SBC)
 
-use_cmdstanr <- TRUE # Set to false to use rstan instead
+use_cmdstanr <- getOption("SBC.vignettes_cmdstanr", TRUE) # Set to false to use rstan instead
 
 if(use_cmdstanr) {
-  library(cmdstanr)
+  library(cmdstanr)
 } else {
-  library(rstan)
+  library(rstan)
+  rstan_options(auto_write = TRUE)
 }
 
-library(bayesplot)
-library(posterior)
+library(bayesplot)
+library(posterior)
 
-library(future)
-plan(multisession) 
+library(future)
+plan(multisession) 
 
-options(SBC.min_chunk_size = 10)
+options(SBC.min_chunk_size = 10)
 
 # Setup caching of results
-cache_dir <- "./rejection_sampling_SBC_cache"
-if(!dir.exists(cache_dir)) {
-  dir.create(cache_dir)
+if(use_cmdstanr) {
+  cache_dir <- "./_rejection_sampling_SBC_cache"
+} else {
+  cache_dir <- "./_rejection_sampling_rstan_SBC_cache"
+}
+if(!dir.exists(cache_dir)) {
+  dir.create(cache_dir)
 }

We’ll use a very simple model throughout this vignette:

-cat(readLines("stan/rejection_sampling.stan"), sep = "\n")
+cat(readLines("stan/rejection_sampling.stan"), sep = "\n")
data {
    int<lower=0> N;
    real y[N];
@@ -209,121 +219,121 @@ 

}

 if(use_cmdstanr) {
-  backend <- SBC_backend_cmdstan_sample(cmdstan_model("stan/rejection_sampling.stan"), iter_warmup = 800, iter_sampling = 800)
+  backend <- SBC_backend_cmdstan_sample(cmdstan_model("stan/rejection_sampling.stan"), iter_warmup = 800, iter_sampling = 800)
 } else {
-  backend <- SBC_backend_rstan_sample(stan_model("stan/rejection_sampling.stan"), iter = 1600, warmup = 800)
+  backend <- SBC_backend_rstan_sample(stan_model("stan/rejection_sampling.stan"), iter = 1600, warmup = 800)
 }
-
-

-No rejections

+
+

No rejections +

First, we’ll use a generator that matches the model exactly.

 N <- 10
 generator <- SBC_generator_function(function() {
-   mu <- rnorm(1, 0, 2)
-   list(
-     parameters = list(mu = mu),
-     generated = list(N = N, y = rnorm(N, mu, 1))
+   mu <- rnorm(1, 0, 2)
+   list(
+     variables = list(mu = mu),
+     generated = list(N = N, y = rnorm(N, mu, 1))
    )
 })

So we expect the SBC to pass even with a large number of fits.

-set.seed(2323455)
+set.seed(2323455)
 datasets <- generate_datasets(generator, 1000)
-results <- compute_results(datasets, backend, keep_fits = FALSE, 
+results <- compute_SBC(datasets, backend, keep_fits = FALSE, 
                     cache_mode = "results", 
-                    cache_location = file.path(cache_dir, "no_rejections"))
-
## Results loaded from cache file 'no_rejections'
-
##  - 1 (0%) fits had at least one Rhat > 1.01. Largest Rhat was 1.011.
-
## Not all diagnostics are OK.
-## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
-## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+ cache_location = file.path(cache_dir, "no_rejections"))
+
## Results loaded from cache file 'no_rejections'
+
##  - 1 (0%) fits had at least one Rhat > 1.01. Largest Rhat was 1.011.
+
## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
-plot_ecdf_diff(results)
+plot_ecdf_diff(results)

 plot_rank_hist(results)

Indeed, all looks good.

-
-

-Rejection based on parameter values

-

Now let us modify the generator to reject based on parameter values.

+
+

Rejection based on unobserved variables +

+

Now let us modify the generator to reject based on values of an unobserved variable.

-generator_reject_param <- SBC_generator_function(function() {
+generator_reject_unobserved <- SBC_generator_function(function() {
    repeat {
-    mu <- rnorm(1, 0, 2)
+    mu <- rnorm(1, 0, 2)
     if(mu > 3) {
       break
     }
    }
-   list(
-     parameters = list(mu = mu),
-     generated = list(N = N, y = rnorm(N, mu, 1))
+   list(
+     variables = list(mu = mu),
+     generated = list(N = N, y = rnorm(N, mu, 1))
    )
 })

We don’t even need to run very many fits to see the problem.

-set.seed(21455)
-datasets_reject_param <- generate_datasets(generator_reject_param, 200)
+set.seed(21455) +datasets_reject_unobserved <- generate_datasets(generator_reject_unobserved, 200)
-results_reject_param <- compute_results(datasets_reject_param, backend, keep_fits = FALSE, 
+results_reject_unobserved <- compute_SBC(datasets_reject_unobserved, backend, keep_fits = FALSE, 
                     cache_mode = "results", 
-                    cache_location = file.path(cache_dir, "reject_param"))
-
## Results loaded from cache file 'reject_param'
+ cache_location = file.path(cache_dir, "reject_unobserved"))
+
## Results loaded from cache file 'reject_unobserved'
-plot_ecdf_diff(results_reject_param)
-

+plot_ecdf_diff(results_reject_unobserved)
+

-plot_rank_hist(results_reject_param)
-

+plot_rank_hist(results_reject_unobserved)
+

Indeed, we see a clear failure.

-
-

-Rejecting based on data

-

But what if we reject based on the values of data? This should in theory result in just a constant change in posterior density and not affect SBC. (SBC will however then check only the non-rejected parts of the data space). We will do a relatively aggressive rejection scheme (reject more than 50% of datasets).

+
+

Rejecting based on data +

+

But what if we reject based on the values of data? This should in theory result in just a constant change in posterior density and not affect SBC. (SBC will however then check only the non-rejected parts of the data space). We will do a relatively aggressive rejection scheme (reject more than 50% of simulations).

 generator_reject_y <- SBC_generator_function(function() {
    repeat {
-    mu <- rnorm(1, 0, 2)
-    y <- rnorm(N, mu, 1)
-    if(mean(y) > 5) {
+    mu <- rnorm(1, 0, 2)
+    y <- rnorm(N, mu, 1)
+    if(mean(y) > 5) {
       break
     }
    }
-   list(
-     parameters = list(mu = mu),
-     generated = list(N = N, y = y)
+   list(
+     variables = list(mu = mu),
+     generated = list(N = N, y = y)
    )
 })
-set.seed(369654)
+set.seed(369654)
 datasets_reject_y <- generate_datasets(generator_reject_y, 1000)
-results_reject_y <- compute_results(datasets_reject_y, backend, keep_fits = FALSE, 
+results_reject_y <- compute_SBC(datasets_reject_y, backend, keep_fits = FALSE, 
                     cache_mode = "results", 
-                    cache_location = file.path(cache_dir, "reject_y"))
-
## Results loaded from cache file 'reject_y'
-
##  - 1 (0%) fits had at least one Rhat > 1.01. Largest Rhat was 1.01.
-
## Not all diagnostics are OK.
-## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
-## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+ cache_location = file.path(cache_dir, "reject_y"))
+
## Results loaded from cache file 'reject_y'
+
##  - 1 (0%) fits had at least one Rhat > 1.01. Largest Rhat was 1.01.
+
## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
 plot_rank_hist(results_reject_y)

-plot_ecdf_diff(results_reject_y)
+plot_ecdf_diff(results_reject_y)

We see that even with quite heavy rejection based on y, SBC to a high resolution passes.

-
-

-Take home message

-

If our priors can sometimes generate datasets that are unrealistic, but we are unable to specify a better prior directly (e.g. because we would need to define some sort of joint prior), we can use rejection sampling to prune unrealistic datasets as long as we only filter by the observed data and don’t directly use any unobserved parameter values. Notably, filtering based on divergences or other fitting issues is also just a function of data and thus permissible. The resulting SBC will however provide guarantees only for datasets that would not be rejected by the same criteria.

+
+

Take home message +

+

If our priors can sometimes result in simulated data that is unrealistic, but we are unable to specify a better prior directly (e.g. because we would need to define some sort of joint prior), we can use rejection sampling to prune unrealistic simulations as long as we only filter by the observed data and don’t directly use any unobserved variable values. Notably, filtering based on divergences or other fitting issues is also just a function of data and thus permissible. The resulting SBC will however provide guarantees only for data that would not be rejected by the same criteria.

@@ -338,11 +348,13 @@

-

Site built with pkgdown 1.6.1.

+

+

Site built with pkgdown 2.0.1.

@@ -351,5 +363,7 @@

+ + diff --git a/docs/articles/rejection_sampling_files/figure-html/reject_param_plots-1.png b/docs/articles/rejection_sampling_files/figure-html/reject_param_plots-1.png deleted file mode 100644 index 2acf00d..0000000 Binary files a/docs/articles/rejection_sampling_files/figure-html/reject_param_plots-1.png and /dev/null differ diff --git a/docs/articles/rejection_sampling_files/figure-html/reject_param_plots-2.png b/docs/articles/rejection_sampling_files/figure-html/reject_param_plots-2.png deleted file mode 100644 index 80ff0ad..0000000 Binary files a/docs/articles/rejection_sampling_files/figure-html/reject_param_plots-2.png and /dev/null differ diff --git a/docs/articles/rejection_sampling_files/figure-html/reject_unobserved_plots-1.png b/docs/articles/rejection_sampling_files/figure-html/reject_unobserved_plots-1.png new file mode 100644 index 0000000..7204e9a Binary files /dev/null and b/docs/articles/rejection_sampling_files/figure-html/reject_unobserved_plots-1.png differ diff --git a/docs/articles/rejection_sampling_files/figure-html/reject_unobserved_plots-2.png b/docs/articles/rejection_sampling_files/figure-html/reject_unobserved_plots-2.png new file mode 100644 index 0000000..c97f8bd Binary files /dev/null and b/docs/articles/rejection_sampling_files/figure-html/reject_unobserved_plots-2.png differ diff --git a/docs/articles/rejection_sampling_files/figure-html/reject_y_plot-1.png b/docs/articles/rejection_sampling_files/figure-html/reject_y_plot-1.png index 7544308..9caeedb 100644 Binary files a/docs/articles/rejection_sampling_files/figure-html/reject_y_plot-1.png and b/docs/articles/rejection_sampling_files/figure-html/reject_y_plot-1.png differ diff --git a/docs/articles/rejection_sampling_files/figure-html/reject_y_plot-2.png b/docs/articles/rejection_sampling_files/figure-html/reject_y_plot-2.png index 95efe32..277d850 100644 Binary files a/docs/articles/rejection_sampling_files/figure-html/reject_y_plot-2.png and b/docs/articles/rejection_sampling_files/figure-html/reject_y_plot-2.png differ diff --git a/docs/articles/rejection_sampling_files/figure-html/results1_plots-1.png b/docs/articles/rejection_sampling_files/figure-html/results1_plots-1.png index 5f9b528..b48b827 100644 Binary files a/docs/articles/rejection_sampling_files/figure-html/results1_plots-1.png and b/docs/articles/rejection_sampling_files/figure-html/results1_plots-1.png differ diff --git a/docs/articles/rejection_sampling_files/figure-html/results1_plots-2.png b/docs/articles/rejection_sampling_files/figure-html/results1_plots-2.png index c127d4d..651a3da 100644 Binary files a/docs/articles/rejection_sampling_files/figure-html/results1_plots-2.png and b/docs/articles/rejection_sampling_files/figure-html/results1_plots-2.png differ diff --git a/docs/articles/rejection_sampling_files/header-attrs-2.10/header-attrs.js b/docs/articles/rejection_sampling_files/header-attrs-2.10/header-attrs.js deleted file mode 100644 index dd57d92..0000000 --- a/docs/articles/rejection_sampling_files/header-attrs-2.10/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/docs/articles/rejection_sampling_files/header-attrs-2.11/header-attrs.js b/docs/articles/rejection_sampling_files/header-attrs-2.11/header-attrs.js deleted file mode 100644 index dd57d92..0000000 --- a/docs/articles/rejection_sampling_files/header-attrs-2.11/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/docs/articles/small_model_workflow.html b/docs/articles/small_model_workflow.html index a8ff3b0..07d5529 100644 --- a/docs/articles/small_model_workflow.html +++ b/docs/articles/small_model_workflow.html @@ -19,6 +19,8 @@ + +
+
-

Here we describe a complete process to iteratively build and validate the implementation of a non-trivial, but still relatively small model. This is not a full Bayesian Workflow, instead the process described here can be thought of as a subroutine in the full workflow: here we take a relatively precise description of a model as input and try to produce a Stan program that implements this model. Once we have a Stan program we trust, it is still necessary to validate its fit to actual data and other properties, which may trigger a need to change the model. At this point you may want to go back to simulations and make sure the modified model is implemented correctly.

-

The workflow described here focuses on small models. “Small” means that the model is relatively fast to fit and we don’t have to worry about computation too much. Once running ~100 fits of the model becomes too costly, there are additional tricks and considerations that we hope to delve into in a “Building a complex model” vignette (which currently doesn’t exist). Still many of the approaches here also apply to complex models (especially starting small and building each component separately), and with proper separation of the model into components, one can validate big chunks of Stan code while working with small models only.

+

Here we describe a complete process to iteratively build and validate the implementation of a non-trivial, but still relatively small model. This is not a full Bayesian Workflow, instead the process described here can be thought of as a subroutine in the full workflow: here we take a relatively precise description of a model as input and try to produce a Stan program that implements this model. Once we have a Stan program we trust, it is still necessary to validate its fit to actual data and other properties, which may trigger a need to change the model. At this point you may want to go back to simulations and make sure the modified model is implemented correctly.

+

The workflow described here focuses on small models. “Small” means that the model is relatively fast to fit and we don’t have to worry about computation too much. Once running ~100 fits of the model becomes too costly, there are additional tricks and considerations that we hope to delve into in a “Building a complex model” vignette (which currently doesn’t exist). Still many of the approaches here also apply to complex models (especially starting small and building smaller submodels separately), and with proper separation of the model into submodels, one can validate big chunks of Stan code while working with small models only.

We expect the reader to be familiar with basics of the package. If not, check out the “basic_usage” vignette.

-
-

-Our goal

+
+

Our goal +

The example we’ll investigate is building a two-component Poisson mixture, where the mixing ratio is allowed to vary with some predictors while the means of the components are the same for all observations. A somewhat contrived real world situation where this could be a useful model: there are two sub-species of an animal that are hard to observe directly, but leave droppings (poop) behind, that we can find. Further, we know the subspecies differ in the average number of droppings they leave at one place. So we can take the number of droppings as a noisy information about which subspecie was present at given location. We observe the number of droppings at multiple locations and record some environmental covariates about the locations (e.g. temperature, altitude) and want to learn something about the association between those covariates and the prevalence of either subspecie.

-
-

-Big picture

-

This model naturally decomposes into two components:

+
+

Big picture +

+

This model naturally decomposes into two submodels:

    -
  1. the mixture component where the mixing ratio is the same for all variables

  2. +
  3. the mixture submodel where the mixing ratio is the same for all observations

  4. a beta regression where we take covariates and make a prediction of a probability, assuming we (noisily) observe the probability.

-

It is good practice to start small and implement and validate each of those components separately and then put them together and validate the bigger model. This makes is substantially easier to locate bugs. You’ll notice that the process ends up involving a lot of steps, but the fact is that we still ignore all the completely invalid models I created while writing this vignette (typos, compile errors, dimension mismatches, …). Developing models you can trust is hard work. More experienced users can definitely make bigger steps at once, but we strongly discourage anyone from writing a big model in one go. My experience is that whenever I try to do this, the model breaks, is impossible to debug and then I end up breaking it down anyway.

+

It is good practice to start small and implement and validate each of those submodels separately and then put them together and validate the bigger model. This makes is substantially easier to locate bugs. You’ll notice that the process ends up involving a lot of steps, but the fact is that we still ignore all the completely invalid models I created while writing this vignette (typos, compile errors, dimension mismatches, …). Developing models you can trust is hard work. More experienced users can definitely make bigger steps at once, but we strongly discourage anyone from writing a big model in one go. My experience is that whenever I try to do this, the model breaks, is impossible to debug and then I end up breaking it down anyway.

Let’s setup and get our hands dirty:

-library(SBC)
+library(SBC)
+
+use_cmdstanr <- getOption("SBC.vignettes_cmdstanr", TRUE) # Set to false to use rstan instead
 
-use_cmdstanr <- TRUE # Set to false to use rstan instead
+if(use_cmdstanr) {
+  library(cmdstanr)
+} else {
+  library(rstan)
+  rstan_options(auto_write = TRUE)
+}
 
-# if(use_cmdstanr) {
-#   library(cmdstanr)
-# } else {
-#   library(rstan)
-# }
-library(cmdstanr)
-library(bayesplot)
-library(posterior)
+library(bayesplot)
+library(posterior)
 
-library(future)
-plan(multisession) 
+library(future)
+plan(multisession) 
 
-options(SBC.min_chunk_size = 5)
+options(SBC.min_chunk_size = 5)
 
 # Setup caching of results
-cache_dir <- "./small_model_worklow_SBC_cache"
-if(!dir.exists(cache_dir)) {
-  dir.create(cache_dir)
+if(use_cmdstanr) {
+  cache_dir <- "./_small_model_worklow_SBC_cache"
+} else {
+  cache_dir <- "./_small_model_worklow_rstan_SBC_cache"
+}
+if(!dir.exists(cache_dir)) {
+  dir.create(cache_dir)
 }
-
-

-Mixture component

-

There is a good guide to mixtures in the Stan user’s guide. Following the user’s guide would save us from a lot of mistakes, but for the sake of example, we will pretend we didn’t really read it - and we’ll see the problems can be discovered via simulations.

-

So this is our first try at implementing the mixture component:

+
+

Mixture submodel +

+

There is a good guide to mixtures in the Stan user’s guide. Following the user’s guide would save us from a lot of mistakes, but for the sake of example, we will pretend we didn’t really read it - and we’ll see the problems can be discovered via simulations.

+

So this is our first try at implementing the mixture submodel:

-cat(readLines("small_model_workflow/mixture_first.stan"), sep = "\n")
+cat(readLines("small_model_workflow/mixture_first.stan"), sep = "\n")
data {
   int<lower=0> N;
   int y[N];
@@ -197,31 +207,36 @@ 

target += normal_lpdf(mu2 | 3, 1); }

-model_first <- cmdstan_model("small_model_workflow/mixture_first.stan")
-backend_first <- SBC_backend_cmdstan_sample(model_first) 
+if(use_cmdstanr) { + model_first <- cmdstan_model("small_model_workflow/mixture_first.stan") + backend_first <- SBC_backend_cmdstan_sample(model_first) +} else { + model_first <- stan_model("small_model_workflow/mixture_first.stan") + backend_first <- SBC_backend_rstan_sample(model_first) +}

And this is our code to simulate data for this model:

 generator_func_first <- function(N) {
-  mu1 <- rnorm(1, 3, 1)
-  mu2 <- rnorm(1, 3, 1)
-  theta <- runif(1)
+  mu1 <- rnorm(1, 3, 1)
+  mu2 <- rnorm(1, 3, 1)
+  theta <- runif(1)
   
-  y <- numeric(N)
+  y <- numeric(N)
   for(n in 1:N) {
-    if(runif(1) < theta) {
-      y[n] <- rpois(1, exp(mu1))
+    if(runif(1) < theta) {
+      y[n] <- rpois(1, exp(mu1))
     } else {
-      y[n] <- rpois(1, exp(mu2))
+      y[n] <- rpois(1, exp(mu2))
     }
   }
   
-  list(
-    parameters = list(
+  list(
+    variables = list(
       mu1 = mu1,
       mu2 = mu2,
       theta = theta
     ),
-    generated = list(
+    generated = list(
       N = N,
       y = y
     )
@@ -229,37 +244,41 @@ 

} generator_first <- SBC_generator_function(generator_func_first, N = 50)

-

Let’s start with just a single dataset:

+

Let’s start with just a single simulation:

-set.seed(68455554)
+set.seed(68455554)
 datasets_first <- generate_datasets(generator_first, 1)
-results_first <- compute_results(datasets_first, backend_first, 
+results_first <- compute_SBC(datasets_first, backend_first, 
                     cache_mode = "results", 
-                    cache_location = file.path(cache_dir, "mixture_first"))
-
## Results loaded from cache file 'mixture_first'
-
##  - 1 (100%) fits had at least one Rhat > 1.01. Largest Rhat was 1.52.
-
##  - 1 (100%) fits had tail ESS undefined or less than half of the maximum rank, potentially skewing 
-## the rank statistics. The lowest tail ESS was NA.
-##  If the fits look good otherwise, increasing `thin_ranks` (via recompute_statistics) 
-## or number of posterior samples (by refitting) might help.
-
##  - 1 (100%) fits had divergent transitions. Maximum number of divergences was 9.
-
## Not all diagnostics are OK.
-## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
-## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+ cache_location = file.path(cache_dir, "mixture_first"))
+
## Results loaded from cache file 'mixture_first'
+
##  - 1 (100%) fits had at least one Rhat > 1.01. Largest Rhat was 1.717.
+
##  - 1 (100%) fits had tail ESS undefined or less than half of the maximum rank, potentially skewing 
+## the rank statistics. The lowest tail ESS was 129.
+##  If the fits look good otherwise, increasing `thin_ranks` (via recompute_SBC_statistics) 
+## or number of posterior draws (by refitting) might help.
+
##  - 1 (100%) fits had divergent transitions. Maximum number of divergences was 5.
+
## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.

Oh, we have convergence problems, let us examine the pairs plots

-mcmc_pairs(results_first$fits[[1]]$draws())
+if(use_cmdstanr) { + mcmc_pairs(results_first$fits[[1]]$draws()) +} else { + mcmc_pairs(results_first$fits[[1]]) +}

One thing that stands out is that either mu1 is tightly determined and mu2 is allowed the full prior range or the other way around. We also don’t learn anything about theta.

This might be puzzling but relates to bad usage of log_mix. The thing is that poisson_log_lpmf(y | mu1) returns a single number - the total log likelihood of all elements of y given mu1. And thus we are building a mixture where either all observations are from the first component or all are from the second component. To implement mixture where each observation is allowed to come from a different component, we need to loop over observations and do a separate log_mix call for each.

-

More details on the mathematical background are explained in the “Vectorizing mixtures” section of Stan User’s guide.

-
-

-Fixing mixture

+

More details on the mathematical background are explained in the “Vectorizing mixtures” section of Stan User’s guide.

+
+

Fixing mixture +

So we’ve fixed the log_mix problem and this is our new model:

-cat(readLines("small_model_workflow/mixture_fixed_log_mix.stan"), sep = "\n")
+cat(readLines("small_model_workflow/mixture_fixed_log_mix.stan"), sep = "\n")
data {
   int<lower=0> N;
   int y[N];
@@ -281,58 +300,76 @@ 

target += normal_lpdf(mu2 | 3, 1); }

-model_fixed_log_mix <- cmdstan_model("small_model_workflow/mixture_fixed_log_mix.stan")
-backend_fixed_log_mix <- SBC_backend_cmdstan_sample(model_fixed_log_mix)
-

So let’s try once again with the same single dataset:

+if(use_cmdstanr) { + model_fixed_log_mix <- cmdstan_model("small_model_workflow/mixture_fixed_log_mix.stan") + backend_fixed_log_mix <- SBC_backend_cmdstan_sample(model_fixed_log_mix) +} else { + model_fixed_log_mix <- stan_model("small_model_workflow/mixture_fixed_log_mix.stan") + backend_fixed_log_mix <- SBC_backend_rstan_sample(model_fixed_log_mix) + +}
+

So let’s try once again with the same single simulation:

-results_fixed_log_mix <- compute_results(datasets_first, backend_fixed_log_mix, 
+results_fixed_log_mix <- compute_SBC(datasets_first, backend_fixed_log_mix, 
                     cache_mode = "results", 
-                    cache_location = file.path(cache_dir, "mixture_fixed_log_mix"))
-
## Results loaded from cache file 'mixture_fixed_log_mix'
+ cache_location = file.path(cache_dir, "mixture_fixed_log_mix"))
+
## Results loaded from cache file 'mixture_fixed_log_mix'
+
##  - 1 (100%) fits had at least one Rhat > 1.01. Largest Rhat was 1.735.
+
##  - 1 (100%) fits had tail ESS undefined or less than half of the maximum rank, potentially skewing 
+## the rank statistics. The lowest tail ESS was 142.
+##  If the fits look good otherwise, increasing `thin_ranks` (via recompute_SBC_statistics) 
+## or number of posterior draws (by refitting) might help.
+
## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.

No warnings this time. We look at the stats:

-
+
 results_fixed_log_mix$stats
-
## # A tibble: 3 x 15
-##   dataset_id parameter simulated_value  rank z_score   mean median     sd    mad
-##        <int> <chr>               <dbl> <dbl>   <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
-## 1          1 mu1                3.13     392   2.09  2.56   2.58   0.269  0.261 
-## 2          1 mu2                4.27     307   0.823 4.26   4.26   0.0171 0.0172
-## 3          1 theta              0.0528   308   0.581 0.0380 0.0331 0.0255 0.0228
-## # ... with 6 more variables: q5 <dbl>, q95 <dbl>, rhat <dbl>, ess_bulk <dbl>,
-## #   ess_tail <dbl>, max_rank <int>
-

We see nothing obviously wrong, the posterior means are relatively close to simulated values (as summarised by the z-scores) - no parameter is clearly ridiculously misfit. So let’s run a few more iterations.

-
-set.seed(8314566)
+
## # A tibble: 3 x 15
+##   sim_id variable simulated_value  rank z_score  mean median    sd   mad     q5
+##    <int> <chr>              <dbl> <dbl>   <dbl> <dbl>  <dbl> <dbl> <dbl>  <dbl>
+## 1      1 mu1               3.13     195  -0.333 3.41   3.81  0.864 0.744 2.23  
+## 2      1 mu2               4.27     364   0.993 3.41   3.71  0.862 0.867 2.24  
+## 3      1 theta             0.0528   153  -0.966 0.500  0.511 0.462 0.692 0.0100
+## # ... with 5 more variables: q95 <dbl>, rhat <dbl>, ess_bulk <dbl>,
+## #   ess_tail <dbl>, max_rank <int>
+

We see nothing obviously wrong, the posterior means are relatively close to simulated values (as summarised by the z-scores) - no variable is clearly ridiculously misfit. So let’s run a few more iterations.

+
+set.seed(8314566)
 datasets_first_10 <- generate_datasets(generator_first, 10)
-
-results_fixed_log_mix_2 <- compute_results(datasets_first_10, backend_fixed_log_mix, 
+
+results_fixed_log_mix_2 <- compute_SBC(datasets_first_10, backend_fixed_log_mix, 
                     cache_mode = "results", 
-                    cache_location = file.path(cache_dir, "mixture_fixed_log_mix_2"))
-
## Results loaded from cache file 'mixture_fixed_log_mix_2'
-
##  - 10 (100%) fits had at least one Rhat > 1.01. Largest Rhat was 1.736.
-
##  - 9 (90%) fits had tail ESS undefined or less than half of the maximum rank, potentially skewing 
-## the rank statistics. The lowest tail ESS was NA.
-##  If the fits look good otherwise, increasing `thin_ranks` (via recompute_statistics) 
-## or number of posterior samples (by refitting) might help.
-
## Not all diagnostics are OK.
-## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
-## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+ cache_location = file.path(cache_dir, "mixture_fixed_log_mix_2"))
+
## Results loaded from cache file 'mixture_fixed_log_mix_2'
+
##  - 10 (100%) fits had at least one Rhat > 1.01. Largest Rhat was 1.735.
+
##  - 8 (80%) fits had tail ESS undefined or less than half of the maximum rank, potentially skewing 
+## the rank statistics. The lowest tail ESS was 28.
+##  If the fits look good otherwise, increasing `thin_ranks` (via recompute_SBC_statistics) 
+## or number of posterior draws (by refitting) might help.
+
## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.

So there are some problems - we have quite a bunch of high R-hat and low ESS values. This is the distribution of all rhats:

-
-hist(results_fixed_log_mix_2$stats$rhat)
+
+hist(results_fixed_log_mix_2$stats$rhat)

Let’s examine a single pairs plot:

-
-mcmc_pairs(results_fixed_log_mix_2$fits[[1]]$draws())
+
+if(use_cmdstanr) {
+  mcmc_pairs(results_fixed_log_mix_2$fits[[1]]$draws())
+} else {
+  mcmc_pairs(results_fixed_log_mix_2$fits[[1]])  
+}

-

We clearly see two modes in the posterior. And upon reflection, we can see why: swapping mu1 with mu2 while also changing theta for 1 - theta gives exactly the same likelihood - because the ordering does not matter. A more detailed explanation of this type of problem is at https://betanalpha.github.io/assets/case_studies/identifying_mixture_models.html

+

We clearly see two modes in the posterior. And upon reflection, we can see why: swapping mu1 with mu2 while also changing theta for 1 - theta gives exactly the same likelihood - because the ordering does not matter. A more detailed explanation of this type of problem is at https://betanalpha.github.io/assets/case_studies/identifying_mixture_models.html

-
-

-Fixing ordering

+
+

Fixing ordering +

We can easily fix the ordering of the mus by using the ordered built-in type.

-
-cat(readLines("small_model_workflow/mixture_fixed_ordered.stan"), sep = "\n")
+
+cat(readLines("small_model_workflow/mixture_fixed_ordered.stan"), sep = "\n")
data {
   int<lower=0> N;
   int y[N];
@@ -351,33 +388,38 @@ 

} target += normal_lpdf(mu | 3, 1); }

-
-model_fixed_ordered <- cmdstan_model("small_model_workflow/mixture_fixed_ordered.stan")
-backend_fixed_ordered <- SBC_backend_cmdstan_sample(model_fixed_ordered) 
+
+if(use_cmdstanr) {
+  model_fixed_ordered <- cmdstan_model("small_model_workflow/mixture_fixed_ordered.stan")
+  backend_fixed_ordered <- SBC_backend_cmdstan_sample(model_fixed_ordered) 
+} else {
+  model_fixed_ordered <- stan_model("small_model_workflow/mixture_fixed_ordered.stan")
+  backend_fixed_ordered <- SBC_backend_rstan_sample(model_fixed_ordered)   
+}

We also need to update the generator to match the new names and ordering constant:

-
+
 generator_func_ordered <- function(N) {
   # If the priors for all components of an ordered vector are the same
   # then just sorting the result of a generator is enough to create
-  # a valid sample from the ordered vector
-  mu <- sort(rnorm(2, 3, 1)) 
-  theta <- runif(1)
+  # a valid draw from the ordered vector prior
+  mu <- sort(rnorm(2, 3, 1)) 
+  theta <- runif(1)
   
-  y <- numeric(N)
+  y <- numeric(N)
   for(n in 1:N) {
-    if(runif(1) < theta) {
-      y[n] <- rpois(1, exp(mu[1]))
+    if(runif(1) < theta) {
+      y[n] <- rpois(1, exp(mu[1]))
     } else {
-      y[n] <- rpois(1, exp(mu[2]))
+      y[n] <- rpois(1, exp(mu[2]))
     }
   }
   
-  list(
-    parameters = list(
+  list(
+    variables = list(
       mu = mu,
       theta = theta
     ),
-    generated = list(
+    generated = list(
       N = N,
       y = y
     )
@@ -385,189 +427,183 @@ 

} generator_ordered <- SBC_generator_function(generator_func_ordered, N = 50)

-

We are kind of confident (and the model fits quickly), so we’ll already start with 10 datasets.

-
-set.seed(3785432)
+

We are kind of confident (and the model fits quickly), so we’ll already start with 10 simulations.

+
+set.seed(3785432)
 datasets_ordered_10 <- generate_datasets(generator_ordered, 10)
-
-results_fixed_ordered <- compute_results(datasets_ordered_10, backend_fixed_ordered, 
+
+results_fixed_ordered <- compute_SBC(datasets_ordered_10, backend_fixed_ordered, 
                     cache_mode = "results", 
-                    cache_location = file.path(cache_dir, "mixture_fixed_ordered"))
-
## Results loaded from cache file 'mixture_fixed_ordered'
-
##  - 2 (20%) fits had at least one Rhat > 1.01. Largest Rhat was 1.207.
-
##  - 2 (20%) fits had tail ESS undefined or less than half of the maximum rank, potentially skewing 
-## the rank statistics. The lowest tail ESS was NA.
-##  If the fits look good otherwise, increasing `thin_ranks` (via recompute_statistics) 
-## or number of posterior samples (by refitting) might help.
-
##  - 2 (20%) fits had divergent transitions. Maximum number of divergences was 145.
-
## Not all diagnostics are OK.
-## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
-## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
-

Now some fits still produce problematic Rhats or divergent transitions, let’s browse the $backend_diagnostics (which contain Stan-specific diagnostic values) to see which datasets are causing problems:

-
+                    cache_location = file.path(cache_dir, "mixture_fixed_ordered"))
+
## Results loaded from cache file 'mixture_fixed_ordered'
+
##  - 2 (20%) fits had at least one Rhat > 1.01. Largest Rhat was 1.207.
+
##  - 2 (20%) fits had tail ESS undefined or less than half of the maximum rank, potentially skewing 
+## the rank statistics. The lowest tail ESS was 30.
+##  If the fits look good otherwise, increasing `thin_ranks` (via recompute_SBC_statistics) 
+## or number of posterior draws (by refitting) might help.
+
##  - 2 (20%) fits had divergent transitions. Maximum number of divergences was 145.
+
## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+

Now some fits still produce problematic Rhats or divergent transitions, let’s browse the $backend_diagnostics (which contain Stan-specific diagnostic values) to see which simulations are causing problems:

+
 results_fixed_ordered$backend_diagnostics
-
##    dataset_id max_chain_time n_failed_chains n_divergent n_max_treedepth
-## 1           1          1.036               0           2               0
-## 2           2          1.354               0         145               0
-## 3           3          0.910               0           0               0
-## 4           4          0.774               0           0               0
-## 5           5          3.801               0           0               0
-## 6           6          1.233               0           0               0
-## 7           7          0.890               0           0               0
-## 8           8          0.815               0           0               0
-## 9           9          0.646               0           0               0
-## 10         10          0.609               0           0               0
-##    n_rejects
-## 1          0
-## 2          0
-## 3          0
-## 4          0
-## 5          0
-## 6          0
-## 7          0
-## 8          0
-## 9          0
-## 10         0
+
##    sim_id max_chain_time n_failed_chains n_divergent n_max_treedepth n_rejects
+## 1       1          0.809               0           7               0         0
+## 2       2          1.143               0         145               0         0
+## 3       3          0.788               0           0               0         0
+## 4       4          0.694               0           0               0         0
+## 5       5          3.400               0           0               0         0
+## 6       6          1.064               0           0               0         0
+## 7       7          0.792               0           0               0         0
+## 8       8          0.667               0           0               0         0
+## 9       9          0.578               0           0               0         0
+## 10     10          0.536               0           0               0         0

One of the fits has quite a lot of divergent transitions. Let’s look at the pairs plot for the model:

-
+
 problematic_fit_id <- 2
 problematic_fit <- results_fixed_ordered$fits[[problematic_fit_id]]
-mcmc_pairs(problematic_fit$draws(), np = nuts_params(problematic_fit))
+if(use_cmdstanr) { + mcmc_pairs(problematic_fit$draws(), np = nuts_params(problematic_fit)) +} else { + mcmc_pairs(problematic_fit, np = nuts_params(problematic_fit)) +}

There is a lot of ugly stuff going on. Notably, one can notice that the posterior of theta is bimodal, preferring either almost 0 or almost 1 - and when that happens, the mean of one of the components is almost unconstrained. Why does that happen? The key to the answer is in the simulated values for the component means:

-
-subset_draws(datasets_ordered_10$parameters, draw = problematic_fit_id)
-
## # A draws_matrix: 1 iterations, 1 chains, and 3 variables
-##     variable
-## draw mu[1] mu[2] theta
-##    2   3.1   3.1  0.65
-

We were unlucky enough to simulate a dataset where both components have almost the same mean and thus we are actually looking at a dataset that is not really a mixture. Mixture models can misbehave badly in such cases (see once again the case study by Mike Betancourt for a bit more detailed dive into this particular problem).

+
+subset_draws(datasets_ordered_10$variables, draw = problematic_fit_id)
+
## # A draws_matrix: 1 iterations, 1 chains, and 3 variables
+##     variable
+## draw mu[1] mu[2] theta
+##    2   3.1   3.1  0.65
+

We were unlucky enough to simulate data where both components have almost the same mean and thus we are actually looking at data that is not really a mixture. Mixture models can misbehave badly in such cases (see once again the case study by Mike Betancourt for a bit more detailed dive into this particular problem).

-
-

-Fixing degenerate components?

+
+

Fixing degenerate components? +

What to do about this? Fixing the model to handle such cases gracefully is hard. But the problem is basically our prior - we want to express that (since we are fitting a two component model), we don’t expect the means to be too similar. So if we can change our simulation to avoid this, we’ll be able to proceed with SBC. If such a pattern appeared in real data, we would still have a problem, but we would notice thanks to the diagnostics.

-

This can definitely be done. But another way is to just ignore the datasets that had divergences for SBC calculations. It turns out that if we remove datasets in a way that only depends on the observed data (and not on unobserved parameters), the SBC identity is preserved and we can use SBC without modifications. The resulting check is however telling us something only for datasets that were not rejected. In this case this is not a big issue: if a fit had divergent transitions, we would not trust it anyway, so removing fits with divergent transitions is not such a big deal.

+

This can definitely be done. But another way is to just ignore the simulations that had divergences for SBC calculations. It turns out that if we remove simulations in a way that only depends on the observed data (and not on unobserved variables), the SBC identity is preserved and we can use SBC without modifications. The resulting check is however telling us something only for data that were not rejected. In this case this is not a big issue: if a fit had divergent transitions, we would not trust it anyway, so removing fits with divergent transitions is not such a big deal.

For more details see the rejection_sampling vignette.

So let us subset the results to avoid divergences:

-
-dataset_ids_to_keep <- 
-  results_fixed_ordered$backend_diagnostics$dataset_id[
+
+sim_ids_to_keep <- 
+  results_fixed_ordered$backend_diagnostics$sim_id[
     results_fixed_ordered$backend_diagnostics$n_divergent == 0]
 
 # Equivalent tidy version if you prefer
-# dataset_ids_to_keep <- results_fixed_ordered$backend_diagnostics %>% 
+# sim_ids_to_keep <- results_fixed_ordered$backend_diagnostics %>% 
 #   dplyr::filter(n_divergent == 0) %>%
-#   dplyr::pull(dataset_id)
-
-
-results_fixed_ordered_subset <- results_fixed_ordered[dataset_ids_to_keep]
-summary(results_fixed_ordered_subset)
-
## SBC_results with 8 total fits.
-##  - No fits had errors.
-##  - No fits gave warnings.
-##  - No fits had Rhat > 1.01.
-##  - All fits had tail ESS > half of the maximum rank.
-##  - The lowest bulk ESS was 1073
-##  - No fits had failed chains.
-##  - No fits had divergent transitions.
-##  - No fits had iterations that saturated max treedepth.
-##  - No fits had steps rejected.
-##  - Maximum time per chain was 3.801 sec.
+# dplyr::pull(sim_id) + + +results_fixed_ordered_subset <- results_fixed_ordered[sim_ids_to_keep] +summary(results_fixed_ordered_subset)
+
## SBC_results with 8 total fits.
+##  - No fits had errors.
+##  - No fits gave warnings.
+##  - No fits had Rhat > 1.01.
+##  - All fits had tail ESS > half of the maximum rank.
+##  - The lowest bulk ESS was 1141
+##  - No fits had failed chains.
+##  - No fits had divergent transitions.
+##  - No fits had iterations that saturated max treedepth.
+##  - No fits had steps rejected.
+##  - Maximum time per chain was 3.4 sec.

This gives us no obvious problems.

-
+
 plot_rank_hist(results_fixed_ordered_subset)

-
-plot_ecdf_diff(results_fixed_ordered_subset)
+
+plot_ecdf_diff(results_fixed_ordered_subset)

-

Since we now have only 8 simulations, it is not surprising that we are still left with a huge uncertainty about the actual coverage of our posterior intervals - we can see that in a plot (showing observed coverage of central posterior intervals of varying width and the associated uncertainty):

-
+

Since we now have only 8 simulations, it is not surprising that we are still left with a huge uncertainty about the actual coverage of our posterior intervals - we can see that in a plot:

+
 plot_coverage(results_fixed_ordered_subset)

+

The coverage plot shows the observed coverage of central posterior intervals of varying width and the associated uncertainty (black + grey), the blue line represents perfect calibration.

Or investigate numerically.

-
-coverage <- empirical_coverage(results_fixed_ordered_subset$stats, width = c(0.5,0.9,0.95))
+
+coverage <- empirical_coverage(results_fixed_ordered_subset$stats, width = c(0.5,0.9,0.95))
 coverage
-
## # A tibble: 9 x 6
-##   parameter width width_represented ci_low estimate ci_high
-##   <chr>     <dbl>             <dbl>  <dbl>    <dbl>   <dbl>
-## 1 mu[1]      0.5              0.499  0.299    0.625   0.863
-## 2 mu[1]      0.9              0.900  0.518    0.875   0.972
-## 3 mu[1]      0.95             0.950  0.518    0.875   0.972
-## 4 mu[2]      0.5              0.499  0.400    0.75    0.925
-## 5 mu[2]      0.9              0.900  0.518    0.875   0.972
-## 6 mu[2]      0.95             0.950  0.518    0.875   0.972
-## 7 theta      0.5              0.499  0.137    0.375   0.701
-## 8 theta      0.9              0.900  0.400    0.75    0.925
-## 9 theta      0.95             0.950  0.400    0.75    0.925
+
## # A tibble: 9 x 6
+##   variable width width_represented ci_low estimate ci_high
+##   <chr>    <dbl>             <dbl>  <dbl>    <dbl>   <dbl>
+## 1 mu[1]     0.5               0.5   0.299    0.625   0.863
+## 2 mu[1]     0.9               0.9   0.518    0.875   0.972
+## 3 mu[1]     0.95              0.95  0.518    0.875   0.972
+## 4 mu[2]     0.5               0.5   0.400    0.75    0.925
+## 5 mu[2]     0.9               0.9   0.518    0.875   0.972
+## 6 mu[2]     0.95              0.95  0.518    0.875   0.972
+## 7 theta     0.5               0.5   0.137    0.375   0.701
+## 8 theta     0.9               0.9   0.400    0.75    0.925
+## 9 theta     0.95              0.95  0.400    0.75    0.925

We can clearly see that while there are no terrible errors, a quite big miscalibration is still consistent with the SBC results so far, for example the 90% posterior interval for theta could (as far as we know) contain 40% - 93% of the true values. That’s not very reassuring.

So we can run for more iterations - to reduce memory consumption, we set keep_fits = FALSE. You generally don’t want to do this unless you are really short on memory, as it makes you unable to inspect any problems in your fits:

-
-set.seed(54987622)
+
+set.seed(54987622)
 datasets_ordered_100 <- generate_datasets(generator_ordered, 100)
-results_fixed_ordered_100 <- compute_results(datasets_ordered_100, backend_fixed_ordered, 
+results_fixed_ordered_100 <- compute_SBC(datasets_ordered_100, backend_fixed_ordered, 
                     keep_fits = FALSE, cache_mode = "results", 
-                    cache_location = file.path(cache_dir, "mixture_fixed_ordered_100"))
-
## Results loaded from cache file 'mixture_fixed_ordered_100'
-
##  - 22 (22%) fits had at least one Rhat > 1.01. Largest Rhat was 1.354.
-
##  - 25 (25%) fits had tail ESS undefined or less than half of the maximum rank, potentially skewing 
-## the rank statistics. The lowest tail ESS was NA.
-##  If the fits look good otherwise, increasing `thin_ranks` (via recompute_statistics) 
-## or number of posterior samples (by refitting) might help.
-
##  - 28 (28%) fits had divergent transitions. Maximum number of divergences was 182.
-
## Not all diagnostics are OK.
-## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
-## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+ cache_location = file.path(cache_dir, "mixture_fixed_ordered_100"))
+
## Results loaded from cache file 'mixture_fixed_ordered_100'
+
##  - 20 (20%) fits had at least one Rhat > 1.01. Largest Rhat was 1.32.
+
##  - 20 (20%) fits had tail ESS undefined or less than half of the maximum rank, potentially skewing 
+## the rank statistics. The lowest tail ESS was 11.
+##  If the fits look good otherwise, increasing `thin_ranks` (via recompute_SBC_statistics) 
+## or number of posterior draws (by refitting) might help.
+
##  - 28 (28%) fits had divergent transitions. Maximum number of divergences was 127.
+
## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.

Once again we subset to keep only non-divergent fits - this also removes all the problematic Rhats and ESS.

-
-dataset_ids_to_keep <- 
-  results_fixed_ordered_100$backend_diagnostics$dataset_id[
+
+sim_ids_to_keep <- 
+  results_fixed_ordered_100$backend_diagnostics$sim_id[
     results_fixed_ordered_100$backend_diagnostics$n_divergent == 0]
 
 # Equivalent tidy version
-# dataset_ids_to_keep <- results_fixed_ordered_100$backend_diagnostics %>% 
+# sim_ids_to_keep <- results_fixed_ordered_100$backend_diagnostics %>% 
 #   dplyr::filter(n_divergent == 0) %>%
-#   dplyr::pull(dataset_id)
-
-
-results_fixed_ordered_100_subset <- results_fixed_ordered_100[dataset_ids_to_keep]
-summary(results_fixed_ordered_100_subset)
-
## SBC_results with 72 total fits.
-##  - No fits had errors.
-##  - No fits gave warnings.
-##  - No fits had Rhat > 1.01.
-##  - All fits had tail ESS > half of the maximum rank.
-##  - The lowest bulk ESS was 507
-##  - No fits had failed chains.
-##  - No fits had divergent transitions.
-##  - No fits had iterations that saturated max treedepth.
-##  - No fits had steps rejected.
-##  - Maximum time per chain was 4.572 sec.
+# dplyr::pull(sim_id) + + +results_fixed_ordered_100_subset <- results_fixed_ordered_100[sim_ids_to_keep] +summary(results_fixed_ordered_100_subset)
+
## SBC_results with 72 total fits.
+##  - No fits had errors.
+##  - No fits gave warnings.
+##  - No fits had Rhat > 1.01.
+##  - All fits had tail ESS > half of the maximum rank.
+##  - The lowest bulk ESS was 507
+##  - No fits had failed chains.
+##  - No fits had divergent transitions.
+##  - No fits had iterations that saturated max treedepth.
+##  - No fits had steps rejected.
+##  - Maximum time per chain was 4.249 sec.

And we can use bind_results to combine the new results with the previous fits to not waste our computational effort.

-
+
 results_fixed_ordered_combined <- bind_results(
   results_fixed_ordered_subset, results_fixed_ordered_100_subset)
 
 plot_rank_hist(results_fixed_ordered_combined)

-
-plot_ecdf_diff(results_fixed_ordered_combined)
+
+plot_ecdf_diff(results_fixed_ordered_combined)

Seems fairly well within the expected bounds. We could definitely run more iterations if we wanted to have a more strict check, but for now, we are happy and the remaining uncertainty about the coverage of our posterior intervals is no longer huge, so it is highly unlikely there is some big bug lurking down there. While we see a potential problem where the coverage for mu[1] and mu[2] is no longer consistent with perfect calibration, the ecdf_diff plot takes precedence as the uncertainty in the coverage plot is only approximate and we thus cannot take it too seriously (see help("empirical_coverage") for some more details).

-
+
 plot_coverage(results_fixed_ordered_combined)

Note: it turns out that extending the model to more components becomes somewhat tricky as the model can become sensitive to initialization. Also the problems with data that can be explained by fewer components than the model assumes become more prevalent.

-
-

-Beta regression component

-

Let’s move to the beta regression component of our model. After spending a bunch of time implementing this, I realized, that maybe treating this as a logistic regression component would have been wiser (and sufficient). But I am gonna keep it in - it just demonstrates that a real workflow can be messy and let’s us show some additional classes of problems and how they manifest in SBC.

+
+

Beta regression submodel +

+

Let’s move to the beta regression submodel of our model. After spending a bunch of time implementing this, I realized, that maybe treating this as a logistic regression submodel would have been wiser (and sufficient). But I am gonna keep it in - it just demonstrates that a real workflow can be messy and let’s us show some additional classes of problems and how they manifest in SBC.

Checking the wiki page for Beta distribution, we notice that it has two parameters, both bounded to be positive. So our first attempt at beta regression just creates two linear predictors - one for each parameter of the distribution. We then exponentiate the predictors to make them positive and we have a model:

-
-cat(readLines("small_model_workflow/beta_first.stan"), sep = "\n")
+
+cat(readLines("small_model_workflow/beta_first.stan"), sep = "\n")
data {
   int<lower=0> N_obs;
   vector<lower=0, upper=1>[N_obs] y;
@@ -585,39 +621,44 @@ 

target += beta_lpdf(y | exp(linpred[1,]), exp(linpred[2,])); target += normal_lpdf(to_vector(beta) | 0, 1); }

-
-model_beta_first <- cmdstan_model("small_model_workflow/beta_first.stan")
-backend_beta_first <- SBC_backend_cmdstan_sample(model_beta_first) 
+
+if(use_cmdstanr) {
+  model_beta_first <- cmdstan_model("small_model_workflow/beta_first.stan")
+  backend_beta_first <- SBC_backend_cmdstan_sample(model_beta_first) 
+} else {
+  model_beta_first <- stan_model("small_model_workflow/beta_first.stan")
+  backend_beta_first <- SBC_backend_rstan_sample(model_beta_first)   
+}

We also write a matching generator (microoptimization tip: I usually write Stan models first so that I can work on the generator code while the Stan model compiles):

-
+
 generator_func_beta_first <- function(N_obs, N_predictors) {
   repeat {
-    beta <- matrix(rnorm(N_predictors * 2, 0, 1), nrow = 2, ncol = N_predictors)
+    beta <- matrix(rnorm(N_predictors * 2, 0, 1), nrow = 2, ncol = N_predictors)
   
-    x <- matrix(rnorm(N_predictors * N_obs, 0, 1), nrow = N_predictors, ncol = N_obs)
+    x <- matrix(rnorm(N_predictors * N_obs, 0, 1), nrow = N_predictors, ncol = N_obs)
     x[1, ] <- 1 # Intercept
   
-    y <- array(NA_real_, N_obs)
+    y <- array(NA_real_, N_obs)
       
     for(n in 1:N_obs) {
-      linpred <- rep(0, 2)
+      linpred <- rep(0, 2)
       for(c in 1:2) {
         for(p in 1:N_predictors) {
           linpred[c] <- linpred[c] + x[p, n] * beta[c, p]
         }
       }
-      y[n] <- rbeta(1, exp(linpred[1]), exp(linpred[2]))
+      y[n] <- rbeta(1, exp(linpred[1]), exp(linpred[2]))
     }
-    if(all(y > 1e-7) && all(y < 1 - 1e-7)) {
+    if(all(y < 1 - 1e-12)) {
       break;
     }
   }
     
-  list(
-    parameters = list(
+  list(
+    variables = list(
       beta = beta
     ),
-    generated = list(
+    generated = list(
       N_obs = N_obs,
       N_predictors = N_predictors,
       y = y,
@@ -627,48 +668,58 @@ 

} generator_beta_first <- SBC_generator_function(generator_func_beta_first, N_obs = 50, N_predictors = 3)

-

We’ll start with 10 datasets once again.

-
-set.seed(3325488)
+

One thing to note is that we add a rejection sampling step - we repeatedly generate simulations, until we find one without y values very close to 1. Those can be problematic as they can be rounded to 1 when the data for Stan is written to disk. And exact 1 is impossible with the Beta likelihood and the model will fail. Rejecting the simulation due to this criterion is quite rare and in fact, it does not threaten the validity of the SBC procedure (at least to the extent our real data also don’t contain such extreme values) - for more details see the rejection_sampling vignette.

+

We’ll start with 10 simulations once again.

+
+set.seed(3325488)
 datasets_beta_first <- generate_datasets(generator_beta_first, 10)
-
-results_beta_first_10 <- compute_results(datasets_beta_first, backend_beta_first, 
+
+results_beta_first_10 <- compute_SBC(datasets_beta_first, backend_beta_first, 
                     cache_mode = "results", 
-                    cache_location = file.path(cache_dir, "beta_first_10"))
-
## Results loaded from cache file 'beta_first_10'
-
##  - 1 (10%) fits had divergent transitions. Maximum number of divergences was 6.
-
##  - 10 (100%) fits had some steps rejected. Maximum number of rejections was 22.
-
## Not all diagnostics are OK.
-## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
-## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+ cache_location = file.path(cache_dir, "beta_first_10"))
+
## Results loaded from cache file 'beta_first_10'
+
##  - 1 (10%) fits had divergent transitions. Maximum number of divergences was 7.
+
##  - 10 (100%) fits had some steps rejected. Maximum number of rejections was 26.
+
## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.

We get a single fit with divergent transitions and the ranks look mostly OK:

-
+
 plot_rank_hist(results_beta_first_10)

-
-plot_ecdf_diff(results_beta_first_10)
+
+plot_ecdf_diff(results_beta_first_10)

Let’s inspect the pairs plot for the offending fit:

-
-mcmc_pairs(results_beta_first_10$fits[[3]]$draws())
+
+if(use_cmdstanr) {
+  mcmc_pairs(results_beta_first_10$fits[[3]]$draws())
+} else {
+  mcmc_pairs(results_beta_first_10$fits[[3]])
+}

-

This is a very ugly plot, but we see some correlations between the corresponding beta elements (e.g. beta[1,1] and beta[2,1]), let’s have a closer look and show the same pairs plot for five of our fits:

-
+

This is a very crowded plot and it is hard to resolve details, but we see some correlations between the corresponding beta elements (e.g. beta[1,1] and beta[2,1]), let’s have a closer look and show the same pairs plot for five of our fits:

+
 for(i in 1:5) {
   fit <- results_beta_first_10$fits[[i]]
-  print(mcmc_pairs(fit$draws(), pars = c("beta[1,1]", "beta[2,1]","beta[1,2]", "beta[2,2]")))
+  if(use_cmdstanr) {
+    pairs_input <- fit$draws()
+  } else {
+    pairs_input <- fit
+  }
+  print(mcmc_pairs(pairs_input, pars = c("beta[1,1]", "beta[2,1]","beta[1,2]", "beta[2,2]")))
 }

-

Turns out the correlations are in all fits, although sometimes they are relatively weak and the sampler is able to handle the posterior, it is potentially troubling. The main issue is that we plan to integrate this model with other components and problems that can be tolerated in a single component might interact with other components and make the model intractable.

-

We can even understand the reason for the positive correlation - it is because mean of our response beta distribution is exp(linpred[1,]) / ( exp(linpred[1,]) + exp(linpred[2,])), so increasing both linear predictors at the same time results in the same mean (but different variance). Since mean is usually more constrained than variance of the response, we get this ridge.

-
-

-Parametrizing the beta distribution via mean

-

The simplest way to resolve the issue with the correlations is to explicitly parametrize the beta distribution by its mean (\(0 < \mu < 1\)). The more common parametrization than adds a precision parameter (\(\phi > 0\)), so we then have \(y \sim \mathrm{Beta}(\mu \phi, (1 - \mu) \phi)\)

-

This also makes much more sense for the bigger task - combining with the mixture component, as we really want to predict just a single probability. So we’ll rewrite our predictors to predict only the logit of the mean (as in logistic regression) and keep the precision as a constant between observations. We could definitely also decide whether to keep the full flexibility and allow predictors for precision, we just don’t do it here.

+

Turns out the correlations are in all fits, although sometimes they are relatively weak and the sampler is able to handle the posterior, it is potentially troubling. The main issue is that we plan to integrate this model with other submodels and problems that can be tolerated in a single submodel might interact with other submodels and make the model intractable.

+

We can even understand the reason for the positive correlation - it is because predicted means of our response beta distribution is the exp(linpred[1,]) / ( exp(linpred[1,]) + exp(linpred[2,])) vector. Increasing both linear predictors by the same amount results in the same predicted means for all elemntes of x (but different predicted variances). And changing the two corresponding beta elements has exactly this effect - the two linear predictor for any x increase by the same amount. In this case, the mean is more constrained than variance of the response, so the individual predictor values for the first shape parameter are allowed to vary quite a bit as long as their counterpart for the second shape parameter increases as well, keeping the predicted mean the same and showing our uncertainty about the variance. And this is how we get this ridge.

+
+

Parametrizing the beta distribution via mean +

+

The simplest way to resolve the issue with the correlations is to explicitly parametrize the beta distribution by its mean (\(0 < \mu < 1\)). The more common parametrization then adds a precision parameter (\(\phi > 0\)), so we then have \(y \sim \mathrm{Beta}(\mu \phi, (1 - \mu) \phi)\)

+

This also makes much more sense for the bigger task - combining with the mixture submodel, as we really want to predict just a single probability. So we’ll rewrite our predictors to predict only the logit of the mean (as in logistic regression) and keep the precision as a constant between observations. We could definitely also decide whether to keep the full flexibility and allow predictors for precision, we just don’t do it here.

This is then our updated model:

-
-cat(readLines("small_model_workflow/beta_precision.stan"), sep = "\n")
+
+cat(readLines("small_model_workflow/beta_precision.stan"), sep = "\n")
data {
   int<lower=0> N_obs;
   vector<lower=0, upper=1>[N_obs] y;
@@ -688,40 +739,46 @@ 

target += beta_lpdf(y | mu * phi, (1 - mu) * phi); target += normal_lpdf(beta | 0, 1); }

-
-model_beta_precision <- cmdstan_model("small_model_workflow/beta_precision.stan")
-backend_beta_precision <- SBC_backend_cmdstan_sample(model_beta_precision) 
+
+if(use_cmdstanr) {
+  model_beta_precision <- cmdstan_model("small_model_workflow/beta_precision.stan")
+  backend_beta_precision <- SBC_backend_cmdstan_sample(model_beta_precision) 
+} else {
+  model_beta_precision <- stan_model("small_model_workflow/beta_precision.stan")
+  backend_beta_precision <- SBC_backend_rstan_sample(model_beta_precision) 
+  
+}

And we need to update the generator to match:

-
+
 generator_func_beta_precision <- function(N_obs, N_predictors) {
   repeat {
-    beta <- rnorm(N_predictors, 0, 1)
-    phi <- rlnorm(1, 3, 1)
+    beta <- rnorm(N_predictors, 0, 1)
+    phi <- rlnorm(1, 3, 1)
   
-    x <- matrix(rnorm(N_predictors * N_obs, 0, 1), nrow = N_predictors, ncol = N_obs)
+    x <- matrix(rnorm(N_predictors * N_obs, 0, 1), nrow = N_predictors, ncol = N_obs)
     x[1, ] <- 1 # Intercept
   
-    y <- array(NA_real_, N_obs)
+    y <- array(NA_real_, N_obs)
       
     for(n in 1:N_obs) {
       linpred <- 0
       for(p in 1:N_predictors) {
         linpred <- linpred + x[p, n] * beta[p]
       }
-      mu <- plogis(linpred)
-      y[n] <- rbeta(1, mu * phi, (1 - mu) * phi)
+      mu <- plogis(linpred)
+      y[n] <- rbeta(1, mu * phi, (1 - mu) * phi)
     }
-    if(all(y > 1e-7) && all(y < 1 - 1e-7)) {
+    if(all(y < 1 - 1e-12)) {
       break;
     }
   }
     
-  list(
-    parameters = list(
+  list(
+    variables = list(
       beta = beta,
       phi = phi
     ),
-    generated = list(
+    generated = list(
       N_obs = N_obs,
       N_predictors = N_predictors,
       y = y,
@@ -732,40 +789,40 @@ 

generator_beta_precision <- SBC_generator_function(generator_func_beta_precision, N_obs = 50, N_predictors = 3)

-

Starting with 10 datasets:

-
-set.seed(46988234)
+

Starting with 10 simulations:

+
+set.seed(46988234)
 datasets_beta_precision_10 <- generate_datasets(generator_beta_precision, 10)
-
-results_beta_precision_10 <- compute_results(datasets_beta_precision_10, backend_beta_precision, 
+
+results_beta_precision_10 <- compute_SBC(datasets_beta_precision_10, backend_beta_precision, 
                     cache_mode = "results", 
-                    cache_location = file.path(cache_dir, "beta_precision_10"))
-
## Results loaded from cache file 'beta_precision_10'
-
##  - 10 (100%) fits had some steps rejected. Maximum number of rejections was 15.
-
## Not all diagnostics are OK.
-## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
-## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+ cache_location = file.path(cache_dir, "beta_precision_10"))
+
## Results loaded from cache file 'beta_precision_10'
+
##  - 10 (100%) fits had some steps rejected. Maximum number of rejections was 15.
+
## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.

No big problems from the fit and the plots:

-
+
 plot_rank_hist(results_beta_precision_10)

-
-plot_ecdf_diff(results_beta_precision_10)
+
+plot_ecdf_diff(results_beta_precision_10)

So we’ll run 90 more iterations and combine them with the previous results:

-
-set.seed(2136468)
+
+set.seed(2136468)
 datasets_beta_precision_90 <- generate_datasets(generator_beta_precision, 90)
-results_beta_precision_90 <- compute_results(
+results_beta_precision_90 <- compute_SBC(
   datasets_beta_precision_90, backend_beta_precision,
   keep_fits = FALSE, cache_mode = "results", 
-  cache_location = file.path(cache_dir, "beta_precision_90"))
-
## Results loaded from cache file 'beta_precision_90'
-
##  - 90 (100%) fits had some steps rejected. Maximum number of rejections was 18.
-
## Not all diagnostics are OK.
-## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
-## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
-
+  cache_location = file.path(cache_dir, "beta_precision_90"))
+
## Results loaded from cache file 'beta_precision_90'
+
##  - 90 (100%) fits had some steps rejected. Maximum number of rejections was 17.
+
## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+
 results_beta_precision_100 <-
   bind_results(
     results_beta_precision_10,
@@ -773,21 +830,21 @@ 

) datasets_beta_precision_100 <- bind_datasets(datasets_beta_precision_10, datasets_beta_precision_90)

-
+
 plot_rank_hist(results_beta_precision_100)

-
-plot_ecdf_diff(results_beta_precision_100)
+
+plot_ecdf_diff(results_beta_precision_100)

-

The plots don’t look terrible, but the beta[2] and especially the phi parameter show slight problems.

+

The plots don’t look terrible, but the beta[2] and especially the phi variable show slight problems.

So we look back at our model code and note that we forgot to put any prior on phi! Mismatches in priors between the model and the simulator are unfortunately often not very well visible for SBC and can require a lot of simulations to discover (see the limits_of_SBC vignette for more detailed discussion)

-
-

-Adding missing prior

+
+

Adding missing prior +

So we add the missing prior to the model:

-
-cat(readLines("small_model_workflow/beta_precision_fixed_prior.stan"), sep = "\n")
+
+cat(readLines("small_model_workflow/beta_precision_fixed_prior.stan"), sep = "\n")
data {
   int<lower=0> N_obs;
   vector<lower=0, upper=1>[N_obs] y;
@@ -808,61 +865,67 @@ 

target += normal_lpdf(beta | 0, 1); target += lognormal_lpdf(phi | 3, 1); }

-
-model_beta_precision_fixed_prior <-
-  cmdstan_model("small_model_workflow/beta_precision_fixed_prior.stan")
-backend_beta_precision_fixed_prior <- SBC_backend_cmdstan_sample(model_beta_precision_fixed_prior) 
-

And recompute for all 100 datasets at once (as we don’t expect adding prior to introduce huge problems).

-
+
+if(use_cmdstanr) {
+  model_beta_precision_fixed_prior <-
+    cmdstan_model("small_model_workflow/beta_precision_fixed_prior.stan")
+  backend_beta_precision_fixed_prior <- SBC_backend_cmdstan_sample(model_beta_precision_fixed_prior) 
+} else {
+  model_beta_precision_fixed_prior <-
+    stan_model("small_model_workflow/beta_precision_fixed_prior.stan")
+  backend_beta_precision_fixed_prior <- SBC_backend_rstan_sample(model_beta_precision_fixed_prior)   
+}
+

And recompute for all 100 simulations at once (as we don’t expect adding prior to introduce huge problems).

+
 results_beta_precision_fixed_prior <- 
-  compute_results(datasets_beta_precision_100, backend_beta_precision_fixed_prior, 
+  compute_SBC(datasets_beta_precision_100, backend_beta_precision_fixed_prior, 
                     keep_fits = FALSE, cache_mode = "results", 
-                    cache_location = file.path(cache_dir, "beta_precision_fixed_prior"))
-
## Results loaded from cache file 'beta_precision_fixed_prior'
-
##  - 100 (100%) fits had some steps rejected. Maximum number of rejections was 20.
-
## Not all diagnostics are OK.
-## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
-## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
-
+                    cache_location = file.path(cache_dir, "beta_precision_fixed_prior"))
+
## Results loaded from cache file 'beta_precision_fixed_prior'
+
##  - 100 (100%) fits had some steps rejected. Maximum number of rejections was 19.
+
## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+
 plot_rank_hist(results_beta_precision_fixed_prior)

-
-plot_ecdf_diff(results_beta_precision_fixed_prior)
+
+plot_ecdf_diff(results_beta_precision_fixed_prior)

-

Diagnostic plots are looking good! So we add 100 more datasets:

-
-set.seed(1233845)
+

Diagnostic plots are looking good! So we add 100 more simulations:

+
+set.seed(1233845)
 datasets_beta_precision_100b <- generate_datasets(generator_beta_precision, 100)
 results_beta_precision_fixed_prior_200 <-
   bind_results(
     results_beta_precision_fixed_prior,
-    compute_results(datasets_beta_precision_100b, backend_beta_precision_fixed_prior, 
+    compute_SBC(datasets_beta_precision_100b, backend_beta_precision_fixed_prior, 
                     keep_fits = FALSE, cache_mode = "results", 
-                    cache_location = file.path(cache_dir, "beta_precision_fixed_prior_2")))
-
## Results loaded from cache file 'beta_precision_fixed_prior_2'
-
##  - 100 (100%) fits had some steps rejected. Maximum number of rejections was 18.
-
## Not all diagnostics are OK.
-## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
-## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
-
+                    cache_location = file.path(cache_dir, "beta_precision_fixed_prior_2")))
+
## Results loaded from cache file 'beta_precision_fixed_prior_2'
+
##  - 100 (100%) fits had some steps rejected. Maximum number of rejections was 18.
+
## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+
 plot_rank_hist(results_beta_precision_fixed_prior_200)

-
-plot_ecdf_diff(results_beta_precision_fixed_prior_200)
+
+plot_ecdf_diff(results_beta_precision_fixed_prior_200)

Yeah, still looking good. And we can see that the empirical coverage of our central intervals is in quite tight agreement with theory:

-
+
 plot_coverage(results_beta_precision_fixed_prior_200)

-

So for now we are also happy about the beta regression component.

+

So for now we are also happy about the beta regression submodel.

-
-

-Putting it together

+
+

Putting it together +

We are finally ready to make a first attempt at the full model:

-
-cat(readLines("small_model_workflow/combined_first.stan"), sep = "\n")
+
+cat(readLines("small_model_workflow/combined_first.stan"), sep = "\n")
data {
   int<lower=0> N_obs;
   int y[N_obs];
@@ -889,46 +952,51 @@ 

target += normal_lpdf(mu | 3, 1); target += normal_lpdf(beta | 0, 1); }

-
-model_combined <- cmdstan_model("small_model_workflow/combined_first.stan")
-backend_combined <- SBC_backend_cmdstan_sample(model_combined)
+
+if(use_cmdstanr) {
+  model_combined <- cmdstan_model("small_model_workflow/combined_first.stan")
+  backend_combined <- SBC_backend_cmdstan_sample(model_combined)
+} else {
+  model_combined <- stan_model("small_model_workflow/combined_first.stan")
+  backend_combined <- SBC_backend_rstan_sample(model_combined)  
+}

And this is our generator for the full model:

-
+
 generator_func_combined <- function(N_obs, N_predictors) {
   # If the priors for all components of an ordered vector are the same
   # then just sorting the result of a generator is enough to create
-  # a valid sample from the ordered vector
-  mu <- sort(rnorm(2, 3, 1)) 
+  # a valid draw from the ordered vector prior
+  mu <- sort(rnorm(2, 3, 1)) 
   
-  beta <- rnorm(N_predictors, 0, 1)
+  beta <- rnorm(N_predictors, 0, 1)
 
-  x <- matrix(rnorm(N_predictors * N_obs, 0, 1), nrow = N_predictors, ncol = N_obs)
+  x <- matrix(rnorm(N_predictors * N_obs, 0, 1), nrow = N_predictors, ncol = N_obs)
   x[1, ] <- 1 # Intercept
 
-  y <- array(NA_real_, N_obs)
+  y <- array(NA_real_, N_obs)
 
   for(n in 1:N_obs) {
     linpred <- 0
     for(p in 1:N_predictors) {
       linpred <- linpred + x[p, n] * beta[p]
     }
-    theta <- plogis(linpred)
+    theta <- plogis(linpred)
     
-    if(runif(1) < theta) {
-      y[n] <- rpois(1, exp(mu[1]))
+    if(runif(1) < theta) {
+      y[n] <- rpois(1, exp(mu[1]))
     } else {
-      y[n] <- rpois(1, exp(mu[2]))
+      y[n] <- rpois(1, exp(mu[2]))
     }
     
   }
 
 
-  list(
-    parameters = list(
+  list(
+    variables = list(
       beta = beta,
       mu = mu
     ),
-    generated = list(
+    generated = list(
       N_obs = N_obs,
       N_predictors = N_predictors,
       y = y,
@@ -939,95 +1007,95 @@ 

generator_combined <- SBC_generator_function(generator_func_combined, N_obs = 50, N_predictors = 3)

We are confident (and the fits are fast anyway), so we start with 200 simulations:

-
-set.seed(5749955)
+
+set.seed(5749955)
 dataset_combined <- generate_datasets(generator_combined, 200)
-
-results_combined <- compute_results(dataset_combined, backend_combined, 
+
+results_combined <- compute_SBC(dataset_combined, backend_combined, 
                     keep_fits = FALSE, cache_mode = "results", 
-                    cache_location = file.path(cache_dir, "combined"))
-
## Results loaded from cache file 'combined'
-
##  - 7 (4%) fits had at least one Rhat > 1.01. Largest Rhat was 1.05.
-
##  - 8 (4%) fits had tail ESS undefined or less than half of the maximum rank, potentially skewing 
-## the rank statistics. The lowest tail ESS was NA.
-##  If the fits look good otherwise, increasing `thin_ranks` (via recompute_statistics) 
-## or number of posterior samples (by refitting) might help.
-
##  - 20 (10%) fits had divergent transitions. Maximum number of divergences was 66.
-
##  - 2 (1%) fits had some steps rejected. Maximum number of rejections was 2.
-
## Not all diagnostics are OK.
-## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
-## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+ cache_location = file.path(cache_dir, "combined"))
+
## Results loaded from cache file 'combined'
+
##  - 7 (4%) fits had at least one Rhat > 1.01. Largest Rhat was 1.05.
+
##  - 6 (3%) fits had tail ESS undefined or less than half of the maximum rank, potentially skewing 
+## the rank statistics. The lowest tail ESS was 17.
+##  If the fits look good otherwise, increasing `thin_ranks` (via recompute_SBC_statistics) 
+## or number of posterior draws (by refitting) might help.
+
##  - 21 (10%) fits had divergent transitions. Maximum number of divergences was 66.
+
##  - 2 (1%) fits had some steps rejected. Maximum number of rejections was 2.
+
## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.

We get some amount of divergent transitions, but the ranks look pretty good:

-
+
 plot_rank_hist(results_combined)

-
-plot_ecdf_diff(results_combined)
+
+plot_ecdf_diff(results_combined)

Indeed it seems the model works pretty well.

-
-

-Adding rejection sampling

+
+

Adding rejection sampling +

As done previously, we could just exclude the fits that had divergences, but just to complete our tour of possibilities, we’ll show one more option to dealing with this type of problem.

The general idea is that although we might not want to/be able to express our prior belief about the model (here that the two mixture components are distinct) by priors on model parameters, we still may be able to express our prior belief about the data itself.

-

And it turns out that if we remove datasets that don’t meet a certain condition imposed on the observed data, the implied prior on parameters becomes an additive constant and we can use exactly the same model to fit only the non-rejected datasets. Note that this does not hold if we rejected datasets based on their parameter values - for more details see the rejection_sampling vignette.

-

The main advantage is that if we can do this, we can avoid wasting computation on fitting datasets that would likely produce divergences anyway. The downside is that it means we no longer have a guarantee the model works for non-rejected datasets, so we need to check if the data we want to analyze would not be rejected by our criterion.

+

And it turns out that if we remove simulations that don’t meet a certain condition imposed on the observed data, the implied prior on parameters becomes an additive constant and we can use exactly the same model to fit only the non-rejected simulations. Note that this does not hold if we rejected simulations based on some unobserved variables - for more details see the rejection_sampling vignette.

+

The main advantage is that if we can do this, we can avoid wasting computation on fitting data that would likely produce divergences anyway. The downside is that it means we no longer have a guarantee the model works for non-rejected data, so we need to check if the data we want to analyze would not be rejected by our criterion.

How to build such a criterion here? We’ll note that for Poisson-distributed variables the ratio of mean to variance (a.k.a the Fano factor) is always 1. So if the components are too similar, the data should resemble a Poisson distribution and have Fano factor of 1, while if the components are distinct the Fano factor will be larger.

Below is a plot of fano factors versus the number of divergences we’ve seen:

-
-fanos <- vapply(dataset_combined$generated, 
-                function(dataset) { var(dataset$y) / mean(dataset$y) }, 
+
+fanos <- vapply(dataset_combined$generated, 
+                function(dataset) { var(dataset$y) / mean(dataset$y) }, 
                 FUN.VALUE = 0)
-plot(fanos, results_combined$backend_diagnostics$n_divergent)
+plot(fanos, results_combined$backend_diagnostics$n_divergent)

All the divergence are for low fano factors - this is the histogram of Fano factor for diverging fits:

-
-hist(fanos[results_combined$backend_diagnostics$n_divergent > 0])
+
+hist(fanos[results_combined$backend_diagnostics$n_divergent > 0])

-

So what we’ll do is that we’ll reject any dataset with Fano factor < 1.5. In practice a simple way to implement this is to wrap our generator code in a loop and break from the loop only when the generated dataset meets our criteria (i.e. is not rejected). This is our code:

-
+

So what we’ll do is that we’ll reject any simulation where the observed data have Fano factor < 1.5. In practice a simple way to implement this is to wrap our generator code in a loop and break from the loop only when the generated data meet our criteria (i.e. is not rejected). This is our code:

+
 generator_func_combined_reject <- function(N_obs, N_predictors) {
   if(N_obs < 5) {
-    stop("Too low N_obs for this simulator")
+    stop("Too low N_obs for this simulator")
   }
   repeat {
     # If the priors for all components of an ordered vector are the same
     # then just sorting the result of a generator is enough to create
-    # a valid sample from the ordered vector
-    mu <- sort(rnorm(2, 3, 1)) 
+    # a valid draw from the ordered vector prior
+    mu <- sort(rnorm(2, 3, 1)) 
     
-    beta <- rnorm(N_predictors, 0, 1)
+    beta <- rnorm(N_predictors, 0, 1)
 
-    x <- matrix(rnorm(N_predictors * N_obs, 0, 1), nrow = N_predictors, ncol = N_obs)
+    x <- matrix(rnorm(N_predictors * N_obs, 0, 1), nrow = N_predictors, ncol = N_obs)
     x[1, ] <- 1 # Intercept
   
-    y <- array(NA_real_, N_obs)
+    y <- array(NA_real_, N_obs)
 
     for(n in 1:N_obs) {
       linpred <- 0
       for(p in 1:N_predictors) {
         linpred <- linpred + x[p, n] * beta[p]
       }
-      theta <- plogis(linpred)
+      theta <- plogis(linpred)
       
-      if(runif(1) < theta) {
-        y[n] <- rpois(1, exp(mu[1]))
+      if(runif(1) < theta) {
+        y[n] <- rpois(1, exp(mu[1]))
       } else {
-        y[n] <- rpois(1, exp(mu[2]))
+        y[n] <- rpois(1, exp(mu[2]))
       }
       
     }
-    if(var(y) / mean(y) > 1.5) {
+    if(var(y) / mean(y) > 1.5) {
       break;
     }
   }
     
-  list(
-    parameters = list(
+  list(
+    variables = list(
       beta = beta,
       mu = mu
     ),
-    generated = list(
+    generated = list(
       N_obs = N_obs,
       N_predictors = N_predictors,
       y = y,
@@ -1038,114 +1106,114 @@ 

generator_combined_reject <- SBC_generator_function(generator_func_combined_reject, N_obs = 50, N_predictors = 3)

-

We’ll once again fit our model to 200 datasets:

-
-set.seed(44685226)
+

We’ll once again fit our model to 200 simulations:

+
+set.seed(44685226)
 dataset_combined_reject <- generate_datasets(generator_combined_reject, 200)
-
-results_combined_reject <- compute_results(dataset_combined_reject, backend_combined, 
+
+results_combined_reject <- compute_SBC(dataset_combined_reject, backend_combined, 
                     keep_fits = FALSE, cache_mode = "results", 
-                    cache_location = file.path(cache_dir, "combined_reject"))
-
## Results loaded from cache file 'combined_reject'
-
##  - 1 (0%) fits had some steps rejected. Maximum number of rejections was 2.
-
## Not all diagnostics are OK.
-## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
-## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+ cache_location = file.path(cache_dir, "combined_reject"))
+
## Results loaded from cache file 'combined_reject'
+
##  - 1 (0%) fits had some steps rejected. Maximum number of rejections was 2.
+
## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.

No more divergences! And the ranks look nice.

-
+
 plot_rank_hist(results_combined_reject)

-
-plot_ecdf_diff(results_combined_reject)
+
+plot_ecdf_diff(results_combined_reject)

And our coverage is pretty tight:

-
+
 plot_coverage(results_combined_reject)

-

Below we show the uncertainty for two parameters and some widths of central posterior intervals numerically:

-
+

Below we show the uncertainty for two variables and some widths of central posterior intervals numerically:

+
 stats_subset <- results_combined_reject$stats[
-  results_combined_reject$stats$parameter %in% c("beta[1]", "mu[1]"),]
-
-empirical_coverage(stats_subset, c(0.25,0.5,0.9,0.95))
-
## # A tibble: 8 x 6
-##   parameter width width_represented ci_low estimate ci_high
-##   <chr>     <dbl>             <dbl>  <dbl>    <dbl>   <dbl>
-## 1 beta[1]    0.25             0.249  0.168    0.22    0.283
-## 2 beta[1]    0.5              0.499  0.402    0.47    0.539
-## 3 beta[1]    0.9              0.900  0.828    0.88    0.918
-## 4 beta[1]    0.95             0.950  0.886    0.93    0.958
-## 5 mu[1]      0.25             0.249  0.182    0.235   0.299
-## 6 mu[1]      0.5              0.499  0.387    0.455   0.524
-## 7 mu[1]      0.9              0.900  0.851    0.9     0.934
-## 8 mu[1]      0.95             0.950  0.910    0.95    0.972
+ results_combined_reject$stats$variable %in% c("beta[1]", "mu[1]"),] + +empirical_coverage(stats_subset, c(0.25,0.5,0.9,0.95))
+
## # A tibble: 8 x 6
+##   variable width width_represented ci_low estimate ci_high
+##   <chr>    <dbl>             <dbl>  <dbl>    <dbl>   <dbl>
+## 1 beta[1]   0.25              0.25  0.173    0.225   0.288
+## 2 beta[1]   0.5               0.5   0.402    0.47    0.539
+## 3 beta[1]   0.9               0.9   0.822    0.875   0.914
+## 4 beta[1]   0.95              0.95  0.880    0.925   0.954
+## 5 mu[1]     0.25              0.25  0.186    0.24    0.304
+## 6 mu[1]     0.5               0.5   0.387    0.455   0.524
+## 7 mu[1]     0.9               0.9   0.851    0.9     0.934
+## 8 mu[1]     0.95              0.95  0.910    0.95    0.972

Maybe we think the remaining uncertainty is too big, so we’ll run 300 more simulations, just to be sure:

-
-set.seed(1395367854)
+
+set.seed(1395367854)
 dataset_combined_reject_more <- generate_datasets(generator_combined_reject, 300) 
 results_combined_reject_more <- bind_results(
   results_combined_reject,
-  compute_results(dataset_combined_reject_more, backend_combined, 
+  compute_SBC(dataset_combined_reject_more, backend_combined, 
                     keep_fits = FALSE, cache_mode = "results", 
-                    cache_location = file.path(cache_dir, "combined_reject_more"))
+                    cache_location = file.path(cache_dir, "combined_reject_more"))
 )
-
## Results loaded from cache file 'combined_reject_more'
-
##  - 2 (1%) fits had at least one Rhat > 1.01. Largest Rhat was 1.093.
-
##  - 2 (1%) fits had tail ESS undefined or less than half of the maximum rank, potentially skewing 
-## the rank statistics. The lowest tail ESS was NA.
-##  If the fits look good otherwise, increasing `thin_ranks` (via recompute_statistics) 
-## or number of posterior samples (by refitting) might help.
-
##  - 2 (1%) fits had divergent transitions. Maximum number of divergences was 119.
-
##  - 3 (1%) fits had some steps rejected. Maximum number of rejections was 2.
-
## Not all diagnostics are OK.
-## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
-## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
+
## Results loaded from cache file 'combined_reject_more'
+
##  - 2 (1%) fits had at least one Rhat > 1.01. Largest Rhat was 1.093.
+
##  - 2 (1%) fits had tail ESS undefined or less than half of the maximum rank, potentially skewing 
+## the rank statistics. The lowest tail ESS was 21.
+##  If the fits look good otherwise, increasing `thin_ranks` (via recompute_SBC_statistics) 
+## or number of posterior draws (by refitting) might help.
+
##  - 2 (1%) fits had divergent transitions. Maximum number of divergences was 119.
+
##  - 3 (1%) fits had some steps rejected. Maximum number of rejections was 2.
+
## Not all diagnostics are OK.
+## You can learn more by inspecting $default_diagnostics, $backend_diagnostics 
+## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.

We get some very small number of problematic fits, which we can ignore in this volume (but probably more aggresive rejection sampling would remove those as well).

Our plots and coverage are now pretty decent:

-
+
 plot_rank_hist(results_combined_reject_more)

-
-plot_ecdf_diff(results_combined_reject_more)
+
+plot_ecdf_diff(results_combined_reject_more)

-
+
 plot_coverage(results_combined_reject_more)

-
+
 stats_subset <- results_combined_reject_more$stats[
-  results_combined_reject_more$stats$parameter %in% c("beta[1]", "mu[2]"),]
-empirical_coverage(stats_subset, c(0.25,0.5,0.9,0.95))
-
## # A tibble: 8 x 6
-##   parameter width width_represented ci_low estimate ci_high
-##   <chr>     <dbl>             <dbl>  <dbl>    <dbl>   <dbl>
-## 1 beta[1]    0.25             0.249  0.188    0.222   0.261
-## 2 beta[1]    0.5              0.499  0.431    0.474   0.518
-## 3 beta[1]    0.9              0.900  0.866    0.896   0.920
-## 4 beta[1]    0.95             0.950  0.911    0.936   0.954
-## 5 mu[2]      0.25             0.249  0.242    0.28    0.321
-## 6 mu[2]      0.5              0.499  0.536    0.58    0.622
-## 7 mu[2]      0.9              0.900  0.886    0.914   0.935
-## 8 mu[2]      0.95             0.950  0.930    0.952   0.967
+ results_combined_reject_more$stats$variable %in% c("beta[1]", "mu[2]"),] +empirical_coverage(stats_subset, c(0.25,0.5,0.9,0.95))
+
## # A tibble: 8 x 6
+##   variable width width_represented ci_low estimate ci_high
+##   <chr>    <dbl>             <dbl>  <dbl>    <dbl>   <dbl>
+## 1 beta[1]   0.25              0.25  0.186    0.22    0.258
+## 2 beta[1]   0.5               0.5   0.433    0.476   0.520
+## 3 beta[1]   0.9               0.9   0.862    0.892   0.916
+## 4 beta[1]   0.95              0.95  0.909    0.934   0.953
+## 5 mu[2]     0.25              0.25  0.244    0.282   0.323
+## 6 mu[2]     0.5               0.5   0.540    0.584   0.626
+## 7 mu[2]     0.9               0.9   0.891    0.918   0.939
+## 8 mu[2]     0.95              0.95  0.930    0.952   0.967

This actually shows a limitation of the coverage results - for mu[2] the approximate CI for coverage excludes exact calibration for a bunch of intervals, but above we see that the more trustworthy plot_ecdf_diff is not showing a problem (although there is some tendency towards slight underdispersion).

Still, this might warrant further investigation if small discrepancies in mu are considered important, if we are interested only in the beta coefficients, we can stay assured that their calibration is pretty good. We give you our word that we ran additional simulations and the discrepancy disappears.

-

Finally, we can also use this simulation exercise to understand what would we be likely to learn from an experiment matching the simulations (50 observations, 3 predictors) and plot the true values against estimated mean + 90% posterior credible interval:

-
+

Finally, we can also use this simulation exercise to understand what would we be likely to learn from an experiment matching the simulations (50 observations, 3 predictors) and plot the true values (simulated by the generator) against estimated mean + 90% posterior credible interval:

+
 plot_sim_estimated(results_combined_reject_more, alpha = 0.2)

We see that we get very precise information about mu and a decent picture about all beta elements, but the reamining uncertainty is large. We could for example compute the probability that the posterior 90% interval for beta[1] excludes zero, i.e. that we learn something about the sign of beta[1]:

-
+
 stats_beta1 <- 
   results_combined_reject_more$stats[
-    results_combined_reject_more$stats$parameter == "beta[1]",]
+    results_combined_reject_more$stats$variable == "beta[1]",]
 
-mean(sign(stats_beta1$q5) == sign(stats_beta1$q95))
-
## [1] 0.498
+mean(sign(stats_beta1$q5) == sign(stats_beta1$q95))
+
## [1] 0.496

Turns out the probability is only around 50%. Depending on your aims, this might be a reason to plan for a larger sample size!

-
-

-Take home message

+
+

Take home message +

There are couple lessons I hope this exercise showed: First, building models you can trust is hard work and it is very easy to make mistakes. Despite the models presented here being relatively simple, diagnosing the problems in them was not straightforward and required non-trivial background knowledge. For this reason, moving in small steps during model development is crucial and can save you time as diagnosing the same problems in a 300-line Stan model with 50 parameters can be basically impossible.

We also hope we convinced you that the SBC package lets you get high-quality information from your simulation efforts and not only diagnose problems but also get some sort of assurance in the end that your model is at least pretty close to your simulator.

And that’s it for this vignette, thanks for staying until the end and hope the workflow ideas will be useful for you!

@@ -1163,11 +1231,13 @@

-

Site built with pkgdown 1.6.1.

+

+

Site built with pkgdown 2.0.1.

@@ -1176,5 +1246,7 @@

+ + diff --git a/docs/articles/small_model_workflow_files/figure-html/beta_first_10_pairs-1.png b/docs/articles/small_model_workflow_files/figure-html/beta_first_10_pairs-1.png index a27210c..2ddfd48 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/beta_first_10_pairs-1.png and b/docs/articles/small_model_workflow_files/figure-html/beta_first_10_pairs-1.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/beta_first_10_pairs_subset-1.png b/docs/articles/small_model_workflow_files/figure-html/beta_first_10_pairs_subset-1.png index 1a510c8..3462451 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/beta_first_10_pairs_subset-1.png and b/docs/articles/small_model_workflow_files/figure-html/beta_first_10_pairs_subset-1.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/beta_first_10_pairs_subset-2.png b/docs/articles/small_model_workflow_files/figure-html/beta_first_10_pairs_subset-2.png index 0071e76..7464b61 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/beta_first_10_pairs_subset-2.png and b/docs/articles/small_model_workflow_files/figure-html/beta_first_10_pairs_subset-2.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/beta_first_10_pairs_subset-3.png b/docs/articles/small_model_workflow_files/figure-html/beta_first_10_pairs_subset-3.png index 8573696..1c64408 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/beta_first_10_pairs_subset-3.png and b/docs/articles/small_model_workflow_files/figure-html/beta_first_10_pairs_subset-3.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/beta_first_10_pairs_subset-4.png b/docs/articles/small_model_workflow_files/figure-html/beta_first_10_pairs_subset-4.png index e9232e8..5837b8e 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/beta_first_10_pairs_subset-4.png and b/docs/articles/small_model_workflow_files/figure-html/beta_first_10_pairs_subset-4.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/beta_first_10_pairs_subset-5.png b/docs/articles/small_model_workflow_files/figure-html/beta_first_10_pairs_subset-5.png index d9d6892..1ba0a62 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/beta_first_10_pairs_subset-5.png and b/docs/articles/small_model_workflow_files/figure-html/beta_first_10_pairs_subset-5.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/beta_first_results-1.png b/docs/articles/small_model_workflow_files/figure-html/beta_first_results-1.png index e66d78f..9ac23b2 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/beta_first_results-1.png and b/docs/articles/small_model_workflow_files/figure-html/beta_first_results-1.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/beta_first_results-2.png b/docs/articles/small_model_workflow_files/figure-html/beta_first_results-2.png index 1884233..2a6c949 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/beta_first_results-2.png and b/docs/articles/small_model_workflow_files/figure-html/beta_first_results-2.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/combined_fanos-1.png b/docs/articles/small_model_workflow_files/figure-html/combined_fanos-1.png index b58dc4f..f81ae5f 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/combined_fanos-1.png and b/docs/articles/small_model_workflow_files/figure-html/combined_fanos-1.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/combined_reject_coverage-1.png b/docs/articles/small_model_workflow_files/figure-html/combined_reject_coverage-1.png index 4668080..c6da9ae 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/combined_reject_coverage-1.png and b/docs/articles/small_model_workflow_files/figure-html/combined_reject_coverage-1.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/combined_reject_plots-1.png b/docs/articles/small_model_workflow_files/figure-html/combined_reject_plots-1.png index 476acc5..0fc84b5 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/combined_reject_plots-1.png and b/docs/articles/small_model_workflow_files/figure-html/combined_reject_plots-1.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/combined_reject_plots-2.png b/docs/articles/small_model_workflow_files/figure-html/combined_reject_plots-2.png index 0517d8a..3ede49f 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/combined_reject_plots-2.png and b/docs/articles/small_model_workflow_files/figure-html/combined_reject_plots-2.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/fanos_divergent-1.png b/docs/articles/small_model_workflow_files/figure-html/fanos_divergent-1.png index ce81043..5e55c94 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/fanos_divergent-1.png and b/docs/articles/small_model_workflow_files/figure-html/fanos_divergent-1.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/mixture_first_convergence-1.png b/docs/articles/small_model_workflow_files/figure-html/mixture_first_convergence-1.png index 42e9c08..b5ee828 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/mixture_first_convergence-1.png and b/docs/articles/small_model_workflow_files/figure-html/mixture_first_convergence-1.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/mixture_fixed_log_mix_pairs-1.png b/docs/articles/small_model_workflow_files/figure-html/mixture_fixed_log_mix_pairs-1.png index 3415b60..474e204 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/mixture_fixed_log_mix_pairs-1.png and b/docs/articles/small_model_workflow_files/figure-html/mixture_fixed_log_mix_pairs-1.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/mixture_fixed_log_mix_rhat-1.png b/docs/articles/small_model_workflow_files/figure-html/mixture_fixed_log_mix_rhat-1.png index 2dfee0b..a63980a 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/mixture_fixed_log_mix_rhat-1.png and b/docs/articles/small_model_workflow_files/figure-html/mixture_fixed_log_mix_rhat-1.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/mixture_fixed_ordered_pairs-1.png b/docs/articles/small_model_workflow_files/figure-html/mixture_fixed_ordered_pairs-1.png index 907c742..8af3261 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/mixture_fixed_ordered_pairs-1.png and b/docs/articles/small_model_workflow_files/figure-html/mixture_fixed_ordered_pairs-1.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/mixture_fixed_ordered_subset_coverage-1.png b/docs/articles/small_model_workflow_files/figure-html/mixture_fixed_ordered_subset_coverage-1.png index 63d681f..4746a67 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/mixture_fixed_ordered_subset_coverage-1.png and b/docs/articles/small_model_workflow_files/figure-html/mixture_fixed_ordered_subset_coverage-1.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/mixture_fixed_ordered_subset_results-1.png b/docs/articles/small_model_workflow_files/figure-html/mixture_fixed_ordered_subset_results-1.png index a8b3345..3ce1133 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/mixture_fixed_ordered_subset_results-1.png and b/docs/articles/small_model_workflow_files/figure-html/mixture_fixed_ordered_subset_results-1.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/mixture_fixed_ordered_subset_results-2.png b/docs/articles/small_model_workflow_files/figure-html/mixture_fixed_ordered_subset_results-2.png index 668f83f..8aa67f0 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/mixture_fixed_ordered_subset_results-2.png and b/docs/articles/small_model_workflow_files/figure-html/mixture_fixed_ordered_subset_results-2.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_100_plots-1.png b/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_100_plots-1.png index 86bcf8d..ac5c3a4 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_100_plots-1.png and b/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_100_plots-1.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_100_plots-2.png b/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_100_plots-2.png index 9ede4d0..d04a838 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_100_plots-2.png and b/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_100_plots-2.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_10_plots-1.png b/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_10_plots-1.png index 46e59ad..5b712df 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_10_plots-1.png and b/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_10_plots-1.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_10_plots-2.png b/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_10_plots-2.png index 264d515..c714373 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_10_plots-2.png and b/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_10_plots-2.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_fixed_prior_200_coverage-1.png b/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_fixed_prior_200_coverage-1.png index 6dfb079..d6f4dec 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_fixed_prior_200_coverage-1.png and b/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_fixed_prior_200_coverage-1.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_fixed_prior_200_plots-1.png b/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_fixed_prior_200_plots-1.png index e1711f6..b07ae9e 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_fixed_prior_200_plots-1.png and b/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_fixed_prior_200_plots-1.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_fixed_prior_200_plots-2.png b/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_fixed_prior_200_plots-2.png index 711d4df..1f0b2f1 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_fixed_prior_200_plots-2.png and b/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_fixed_prior_200_plots-2.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_fixed_prior_plots-1.png b/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_fixed_prior_plots-1.png index a5554f8..4ebad80 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_fixed_prior_plots-1.png and b/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_fixed_prior_plots-1.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_fixed_prior_plots-2.png b/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_fixed_prior_plots-2.png index 1057616..e1bdd1f 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_fixed_prior_plots-2.png and b/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_fixed_prior_plots-2.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/results_combined_plots-1.png b/docs/articles/small_model_workflow_files/figure-html/results_combined_plots-1.png index 52e29d8..d7eee93 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/results_combined_plots-1.png and b/docs/articles/small_model_workflow_files/figure-html/results_combined_plots-1.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/results_combined_plots-2.png b/docs/articles/small_model_workflow_files/figure-html/results_combined_plots-2.png index 3a2f67c..94a8cbb 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/results_combined_plots-2.png and b/docs/articles/small_model_workflow_files/figure-html/results_combined_plots-2.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/results_combined_reject_more_coverage-1.png b/docs/articles/small_model_workflow_files/figure-html/results_combined_reject_more_coverage-1.png index 2ef410a..a182699 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/results_combined_reject_more_coverage-1.png and b/docs/articles/small_model_workflow_files/figure-html/results_combined_reject_more_coverage-1.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/results_combined_reject_more_plots-1.png b/docs/articles/small_model_workflow_files/figure-html/results_combined_reject_more_plots-1.png index b2ffefa..22f80ec 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/results_combined_reject_more_plots-1.png and b/docs/articles/small_model_workflow_files/figure-html/results_combined_reject_more_plots-1.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/results_combined_reject_more_plots-2.png b/docs/articles/small_model_workflow_files/figure-html/results_combined_reject_more_plots-2.png index ce97422..6dd4cbd 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/results_combined_reject_more_plots-2.png and b/docs/articles/small_model_workflow_files/figure-html/results_combined_reject_more_plots-2.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/results_fixed_ordered_combined_results-1.png b/docs/articles/small_model_workflow_files/figure-html/results_fixed_ordered_combined_results-1.png index c65b453..b2c4e4d 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/results_fixed_ordered_combined_results-1.png and b/docs/articles/small_model_workflow_files/figure-html/results_fixed_ordered_combined_results-1.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/results_fixed_ordered_combined_results-2.png b/docs/articles/small_model_workflow_files/figure-html/results_fixed_ordered_combined_results-2.png index a91b95f..2dcf52f 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/results_fixed_ordered_combined_results-2.png and b/docs/articles/small_model_workflow_files/figure-html/results_fixed_ordered_combined_results-2.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/results_fixed_ordered_combined_results_coverage-1.png b/docs/articles/small_model_workflow_files/figure-html/results_fixed_ordered_combined_results_coverage-1.png index 8acb912..5f6eca3 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/results_fixed_ordered_combined_results_coverage-1.png and b/docs/articles/small_model_workflow_files/figure-html/results_fixed_ordered_combined_results_coverage-1.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/sim_estimated_final-1.png b/docs/articles/small_model_workflow_files/figure-html/sim_estimated_final-1.png index 5538e71..e8821c0 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/sim_estimated_final-1.png and b/docs/articles/small_model_workflow_files/figure-html/sim_estimated_final-1.png differ diff --git a/docs/articles/small_model_workflow_files/header-attrs-2.10/header-attrs.js b/docs/articles/small_model_workflow_files/header-attrs-2.10/header-attrs.js deleted file mode 100644 index dd57d92..0000000 --- a/docs/articles/small_model_workflow_files/header-attrs-2.10/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/docs/articles/small_model_workflow_files/header-attrs-2.11/header-attrs.js b/docs/articles/small_model_workflow_files/header-attrs-2.11/header-attrs.js deleted file mode 100644 index dd57d92..0000000 --- a/docs/articles/small_model_workflow_files/header-attrs-2.11/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/docs/authors.html b/docs/authors.html index e4a3d9d..659c95c 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -1,66 +1,12 @@ - - - - - - - -Authors • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Authors and Citation • SBC + + - - - - - -
-
-
- -
+
- @@ -182,22 +145,20 @@

Authors

-
- +
- - + + diff --git a/docs/bootstrap-toc.css b/docs/bootstrap-toc.css deleted file mode 100644 index 5a85941..0000000 --- a/docs/bootstrap-toc.css +++ /dev/null @@ -1,60 +0,0 @@ -/*! - * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) - * Copyright 2015 Aidan Feldman - * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ - -/* modified from https://github.com/twbs/bootstrap/blob/94b4076dd2efba9af71f0b18d4ee4b163aa9e0dd/docs/assets/css/src/docs.css#L548-L601 */ - -/* All levels of nav */ -nav[data-toggle='toc'] .nav > li > a { - display: block; - padding: 4px 20px; - font-size: 13px; - font-weight: 500; - color: #767676; -} -nav[data-toggle='toc'] .nav > li > a:hover, -nav[data-toggle='toc'] .nav > li > a:focus { - padding-left: 19px; - color: #563d7c; - text-decoration: none; - background-color: transparent; - border-left: 1px solid #563d7c; -} -nav[data-toggle='toc'] .nav > .active > a, -nav[data-toggle='toc'] .nav > .active:hover > a, -nav[data-toggle='toc'] .nav > .active:focus > a { - padding-left: 18px; - font-weight: bold; - color: #563d7c; - background-color: transparent; - border-left: 2px solid #563d7c; -} - -/* Nav: second level (shown on .active) */ -nav[data-toggle='toc'] .nav .nav { - display: none; /* Hide by default, but at >768px, show it */ - padding-bottom: 10px; -} -nav[data-toggle='toc'] .nav .nav > li > a { - padding-top: 1px; - padding-bottom: 1px; - padding-left: 30px; - font-size: 12px; - font-weight: normal; -} -nav[data-toggle='toc'] .nav .nav > li > a:hover, -nav[data-toggle='toc'] .nav .nav > li > a:focus { - padding-left: 29px; -} -nav[data-toggle='toc'] .nav .nav > .active > a, -nav[data-toggle='toc'] .nav .nav > .active:hover > a, -nav[data-toggle='toc'] .nav .nav > .active:focus > a { - padding-left: 28px; - font-weight: 500; -} - -/* from https://github.com/twbs/bootstrap/blob/e38f066d8c203c3e032da0ff23cd2d6098ee2dd6/docs/assets/css/src/docs.css#L631-L634 */ -nav[data-toggle='toc'] .nav > .active > ul { - display: block; -} diff --git a/docs/bootstrap-toc.js b/docs/bootstrap-toc.js deleted file mode 100644 index 1cdd573..0000000 --- a/docs/bootstrap-toc.js +++ /dev/null @@ -1,159 +0,0 @@ -/*! - * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) - * Copyright 2015 Aidan Feldman - * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ -(function() { - 'use strict'; - - window.Toc = { - helpers: { - // return all matching elements in the set, or their descendants - findOrFilter: function($el, selector) { - // http://danielnouri.org/notes/2011/03/14/a-jquery-find-that-also-finds-the-root-element/ - // http://stackoverflow.com/a/12731439/358804 - var $descendants = $el.find(selector); - return $el.filter(selector).add($descendants).filter(':not([data-toc-skip])'); - }, - - generateUniqueIdBase: function(el) { - var text = $(el).text(); - var anchor = text.trim().toLowerCase().replace(/[^A-Za-z0-9]+/g, '-'); - return anchor || el.tagName.toLowerCase(); - }, - - generateUniqueId: function(el) { - var anchorBase = this.generateUniqueIdBase(el); - for (var i = 0; ; i++) { - var anchor = anchorBase; - if (i > 0) { - // add suffix - anchor += '-' + i; - } - // check if ID already exists - if (!document.getElementById(anchor)) { - return anchor; - } - } - }, - - generateAnchor: function(el) { - if (el.id) { - return el.id; - } else { - var anchor = this.generateUniqueId(el); - el.id = anchor; - return anchor; - } - }, - - createNavList: function() { - return $(''); - }, - - createChildNavList: function($parent) { - var $childList = this.createNavList(); - $parent.append($childList); - return $childList; - }, - - generateNavEl: function(anchor, text) { - var $a = $(''); - $a.attr('href', '#' + anchor); - $a.text(text); - var $li = $('
  • '); - $li.append($a); - return $li; - }, - - generateNavItem: function(headingEl) { - var anchor = this.generateAnchor(headingEl); - var $heading = $(headingEl); - var text = $heading.data('toc-text') || $heading.text(); - return this.generateNavEl(anchor, text); - }, - - // Find the first heading level (`

    `, then `

    `, etc.) that has more than one element. Defaults to 1 (for `

    `). - getTopLevel: function($scope) { - for (var i = 1; i <= 6; i++) { - var $headings = this.findOrFilter($scope, 'h' + i); - if ($headings.length > 1) { - return i; - } - } - - return 1; - }, - - // returns the elements for the top level, and the next below it - getHeadings: function($scope, topLevel) { - var topSelector = 'h' + topLevel; - - var secondaryLevel = topLevel + 1; - var secondarySelector = 'h' + secondaryLevel; - - return this.findOrFilter($scope, topSelector + ',' + secondarySelector); - }, - - getNavLevel: function(el) { - return parseInt(el.tagName.charAt(1), 10); - }, - - populateNav: function($topContext, topLevel, $headings) { - var $context = $topContext; - var $prevNav; - - var helpers = this; - $headings.each(function(i, el) { - var $newNav = helpers.generateNavItem(el); - var navLevel = helpers.getNavLevel(el); - - // determine the proper $context - if (navLevel === topLevel) { - // use top level - $context = $topContext; - } else if ($prevNav && $context === $topContext) { - // create a new level of the tree and switch to it - $context = helpers.createChildNavList($prevNav); - } // else use the current $context - - $context.append($newNav); - - $prevNav = $newNav; - }); - }, - - parseOps: function(arg) { - var opts; - if (arg.jquery) { - opts = { - $nav: arg - }; - } else { - opts = arg; - } - opts.$scope = opts.$scope || $(document.body); - return opts; - } - }, - - // accepts a jQuery object, or an options object - init: function(opts) { - opts = this.helpers.parseOps(opts); - - // ensure that the data attribute is in place for styling - opts.$nav.attr('data-toggle', 'toc'); - - var $topContext = this.helpers.createChildNavList(opts.$nav); - var topLevel = this.helpers.getTopLevel(opts.$scope); - var $headings = this.helpers.getHeadings(opts.$scope, topLevel); - this.helpers.populateNav($topContext, topLevel, $headings); - } - }; - - $(function() { - $('nav[data-toggle="toc"]').each(function(i, el) { - var $nav = $(el); - Toc.init($nav); - }); - }); -})(); diff --git a/docs/docsearch.css b/docs/docsearch.css deleted file mode 100644 index e5f1fe1..0000000 --- a/docs/docsearch.css +++ /dev/null @@ -1,148 +0,0 @@ -/* Docsearch -------------------------------------------------------------- */ -/* - Source: https://github.com/algolia/docsearch/ - License: MIT -*/ - -.algolia-autocomplete { - display: block; - -webkit-box-flex: 1; - -ms-flex: 1; - flex: 1 -} - -.algolia-autocomplete .ds-dropdown-menu { - width: 100%; - min-width: none; - max-width: none; - padding: .75rem 0; - background-color: #fff; - background-clip: padding-box; - border: 1px solid rgba(0, 0, 0, .1); - box-shadow: 0 .5rem 1rem rgba(0, 0, 0, .175); -} - -@media (min-width:768px) { - .algolia-autocomplete .ds-dropdown-menu { - width: 175% - } -} - -.algolia-autocomplete .ds-dropdown-menu::before { - display: none -} - -.algolia-autocomplete .ds-dropdown-menu [class^=ds-dataset-] { - padding: 0; - background-color: rgb(255,255,255); - border: 0; - max-height: 80vh; -} - -.algolia-autocomplete .ds-dropdown-menu .ds-suggestions { - margin-top: 0 -} - -.algolia-autocomplete .algolia-docsearch-suggestion { - padding: 0; - overflow: visible -} - -.algolia-autocomplete .algolia-docsearch-suggestion--category-header { - padding: .125rem 1rem; - margin-top: 0; - font-size: 1.3em; - font-weight: 500; - color: #00008B; - border-bottom: 0 -} - -.algolia-autocomplete .algolia-docsearch-suggestion--wrapper { - float: none; - padding-top: 0 -} - -.algolia-autocomplete .algolia-docsearch-suggestion--subcategory-column { - float: none; - width: auto; - padding: 0; - text-align: left -} - -.algolia-autocomplete .algolia-docsearch-suggestion--content { - float: none; - width: auto; - padding: 0 -} - -.algolia-autocomplete .algolia-docsearch-suggestion--content::before { - display: none -} - -.algolia-autocomplete .ds-suggestion:not(:first-child) .algolia-docsearch-suggestion--category-header { - padding-top: .75rem; - margin-top: .75rem; - border-top: 1px solid rgba(0, 0, 0, .1) -} - -.algolia-autocomplete .ds-suggestion .algolia-docsearch-suggestion--subcategory-column { - display: block; - padding: .1rem 1rem; - margin-bottom: 0.1; - font-size: 1.0em; - font-weight: 400 - /* display: none */ -} - -.algolia-autocomplete .algolia-docsearch-suggestion--title { - display: block; - padding: .25rem 1rem; - margin-bottom: 0; - font-size: 0.9em; - font-weight: 400 -} - -.algolia-autocomplete .algolia-docsearch-suggestion--text { - padding: 0 1rem .5rem; - margin-top: -.25rem; - font-size: 0.8em; - font-weight: 400; - line-height: 1.25 -} - -.algolia-autocomplete .algolia-docsearch-footer { - width: 110px; - height: 20px; - z-index: 3; - margin-top: 10.66667px; - float: right; - font-size: 0; - line-height: 0; -} - -.algolia-autocomplete .algolia-docsearch-footer--logo { - background-image: url("data:image/svg+xml;utf8,"); - background-repeat: no-repeat; - background-position: 50%; - background-size: 100%; - overflow: hidden; - text-indent: -9000px; - width: 100%; - height: 100%; - display: block; - transform: translate(-8px); -} - -.algolia-autocomplete .algolia-docsearch-suggestion--highlight { - color: #FF8C00; - background: rgba(232, 189, 54, 0.1) -} - - -.algolia-autocomplete .algolia-docsearch-suggestion--text .algolia-docsearch-suggestion--highlight { - box-shadow: inset 0 -2px 0 0 rgba(105, 105, 105, .5) -} - -.algolia-autocomplete .ds-suggestion.ds-cursor .algolia-docsearch-suggestion--content { - background-color: rgba(192, 192, 192, .15) -} diff --git a/docs/docsearch.js b/docs/docsearch.js deleted file mode 100644 index b35504c..0000000 --- a/docs/docsearch.js +++ /dev/null @@ -1,85 +0,0 @@ -$(function() { - - // register a handler to move the focus to the search bar - // upon pressing shift + "/" (i.e. "?") - $(document).on('keydown', function(e) { - if (e.shiftKey && e.keyCode == 191) { - e.preventDefault(); - $("#search-input").focus(); - } - }); - - $(document).ready(function() { - // do keyword highlighting - /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */ - var mark = function() { - - var referrer = document.URL ; - var paramKey = "q" ; - - if (referrer.indexOf("?") !== -1) { - var qs = referrer.substr(referrer.indexOf('?') + 1); - var qs_noanchor = qs.split('#')[0]; - var qsa = qs_noanchor.split('&'); - var keyword = ""; - - for (var i = 0; i < qsa.length; i++) { - var currentParam = qsa[i].split('='); - - if (currentParam.length !== 2) { - continue; - } - - if (currentParam[0] == paramKey) { - keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20")); - } - } - - if (keyword !== "") { - $(".contents").unmark({ - done: function() { - $(".contents").mark(keyword); - } - }); - } - } - }; - - mark(); - }); -}); - -/* Search term highlighting ------------------------------*/ - -function matchedWords(hit) { - var words = []; - - var hierarchy = hit._highlightResult.hierarchy; - // loop to fetch from lvl0, lvl1, etc. - for (var idx in hierarchy) { - words = words.concat(hierarchy[idx].matchedWords); - } - - var content = hit._highlightResult.content; - if (content) { - words = words.concat(content.matchedWords); - } - - // return unique words - var words_uniq = [...new Set(words)]; - return words_uniq; -} - -function updateHitURL(hit) { - - var words = matchedWords(hit); - var url = ""; - - if (hit.anchor) { - url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor; - } else { - url = hit.url + '?q=' + escape(words.join(" ")); - } - - return url; -} diff --git a/docs/favicon-16x16.png b/docs/favicon-16x16.png deleted file mode 100644 index 4babbe0..0000000 Binary files a/docs/favicon-16x16.png and /dev/null differ diff --git a/docs/favicon-32x32.png b/docs/favicon-32x32.png deleted file mode 100644 index 7048cdd..0000000 Binary files a/docs/favicon-32x32.png and /dev/null differ diff --git a/docs/favicon.ico b/docs/favicon.ico deleted file mode 100644 index 68b8dfd..0000000 Binary files a/docs/favicon.ico and /dev/null differ diff --git a/docs/index.html b/docs/index.html index a6fddd6..9d09310 100644 --- a/docs/index.html +++ b/docs/index.html @@ -12,14 +12,18 @@ - + + +
    -
    - -
    -

    -Efficient simulation-based calibration for Bayesian models

    -

    SBC provides tools to easily validate and offer corrections on prior, likelihood, and computation algorithms based on the self-recovering property of Bayesian models. This package contains tools such as SBC rank histograms, ECDF plots, and their summary statistics which can be used to assess computational faithfulness.

    -
    -

    -Varieties of calibrations: scope of this package

    -

    Calibration (i.e. reliability) is not a sufficient condition for a good forecast but a minimal property that any forecast should satisfy (FV1998). It serves as a bootstrap for model development and its method and target varies. Target is chosen as modeler’s quantity of interest and directly affects the calibrated result as reward in reinforcement learning. Method depends on how much you marginalized or conditioned the full joint space to test coverage. Scope of this package is checked below.

    -
    -
    -
    -

    -Interface and Usage

    -

    SBC is designed to be primarily used with Stan models, offering a highly customizable interface to integrate Simulation Based Calibration into existing Bayesian workflows with minimal effort. Its main feature is the api interface, which defines a fully-blown SBC pipeline starting from dataset generation to posterior sampling. Once a user has a valid Stan model and a minor R function defining the data generating process(referred to as Generator), running SBC becomes as simple as:

    -
    n_datasets <- 100  # Number of SBC iterations to run
    -
    -sbc_generator <- SBC::function_SBC_generator(Generator)
    -sbc_dataset <- SBC::generate_datasets(
    -  sbc_generator, 
    -  n_datasets)
    -
    -cmdstan_backend <- SBC::cmdstan_sample_SBC_backend(
    -    cmdstan_model, iter_warmup = 1000, iter_sampling = 1000)
    -    
    -results <- SBC::compute_results(sbc_dataset, cmdstan_backend)
    -plot_rank_hist(results)
    -

    For detailed usage, please refer to the included vignettes.

    -
    -

    -Compatibility

    -

    Currently SBC supports cmdstan, rstan, and brms models out of the box. However, adding Backends for other platforms is supported.

    -
    -
    -
    -

    -Installation

    +
    + +

    SBC provides tools to validate your Bayesian model and/or a sampling algorithm via the self-recovering property of Bayesian models. This package lets you run SBC easily and perform postprocessing and visualisations of the results to assess computational faithfulness.

    +
    +

    Installation +

    To install the development version of SBC, run

    -
    devtools::install_github("hyunjimoon/SBC")
    -

    from your R console.

    -
    -

    -References:

    -

    Theoretical support

    +
    +devtools::install_github("hyunjimoon/SBC")
    +
    +
    +

    Quick tour +

    +

    To use SBC, you need a piece of code that generates simulated data that should match your model (a generator) and a statistical model + algorithm + algorithm parameters that can fit the model to data (a backend). SBC then lets you discover when the backend and generator don’t encode the same data generating process (up to certain limitations).

    +

    For a quick example, we’ll use a simple generator producing normally-distributed data (basically y <- rnorm(N, mu, sigma)) with a backend in Stan that mismatches the generator by wrongly assuming Stan parametrizes the normal distribution via precision (i.e. it has y ~ normal(mu, 1 / sigma ^ 2)).

    +
    +library(SBC)
    +gen <- SBC_example_generator("normal")
    +# interface = "cmdstanr" or "rjags" is also supported
    +backend_bad <- SBC_example_backend("normal_bad", interface = "rstan")
    +

    Note: Using the cmdstanr interface, a small number of rejected steps will be reported. Those are false positives and do not threaten validity (they happen during warmup). This is a result of difficulties in parsing the output of cmdstanr. We are working on a resolution.

    +

    You can use SBC_print_example_model("normal_bad") to inspect the model used.

    +

    We generate 50 simulated datasets and perform SBC:

    +
    +ds <- generate_datasets(gen, n_sims = 50)
    +results_bad <- compute_SBC(ds, backend_bad)
    +

    The results then give us diagnostic plots that immediately show a problem: the distribution of SBC ranks is not uniform as witnessed by both the rank histogram and the difference between sample ECDF and the expected deviations from theoretical CDF.

    +
    +plot_rank_hist(results_bad)
    +plot_ecdf_diff(results_bad)
    +

    We can then run SBC with a backend that uses the correct parametrization (i.e. with y ~ normal(mu, sigma)):

    +
    +backend_sd <- SBC_example_backend("normal_sd", interface = "rstan")
    +results_sd <- compute_SBC(ds, backend_sd)
    +
    +plot_rank_hist(results_sd)
    +plot_ecdf_diff(results_sd)
    +

    The diagnostic plots show no problems in this case. As with any other software test, we can observe clear failures, but absence of failures does not imply correctness. We can however make the SBC check more thorough by using a lot of simulations and including suitable generated quantities to guard against known limitations of vanilla SBC.

    +
    +
    +

    Paralellization +

    +

    The examples above are very fast to compute, but in real use cases, you almost certainly want to let the computation run in parallel via the future package.

    +
    +library(future)
    +plan(multisession)
    +
    +
    +

    More information +

    +

    The package vignettes provide additional context and examples. Notably:

    +

    Currently SBC supports cmdstanr, rstan, and brms models out of the box. With a little additional work, you can integrate SBC with any exact or approximate fitting method as shown in the Implementing backends vignette.

    +
    +
    +

    References +

    +
    -
    -
    -

    -FAQ

    +
    +

    FAQ +

    How does calibration relate to prediction accuracy?

    Comparing the ground truth and the simulated result is a backbone of calibration and comparison target greatly affects the calibrated (i.e. trained) result, similar to reward in reinforcement learning. In this sense, if the U(a(y), theta) term is designed for prediction, the model will be calibrated to have best predictive result as possible.

    +
    +

    Acknowledgements +

    +

    Development of this package was supported by ELIXIR CZ research infrastructure project (Ministry of Youth, Education and Sports of the Czech Republic, Grant No: LM2018131) including access to computing and storage facilities.

    +
    -

    Site built with pkgdown 1.6.1.

    +

    +

    Site built with pkgdown 2.0.1.

    @@ -243,5 +287,7 @@

    Developers

    + + diff --git a/docs/link.svg b/docs/link.svg deleted file mode 100644 index 88ad827..0000000 --- a/docs/link.svg +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - diff --git a/docs/pkgdown.css b/docs/pkgdown.css index 1273238..80ea5b8 100644 --- a/docs/pkgdown.css +++ b/docs/pkgdown.css @@ -56,8 +56,10 @@ img.icon { float: right; } -img { +/* Ensure in-page images don't run outside their container */ +.contents img { max-width: 100%; + height: auto; } /* Fix bug in bootstrap (only seen in firefox) */ @@ -78,11 +80,10 @@ dd { /* Section anchors ---------------------------------*/ a.anchor { - margin-left: -30px; - display:inline-block; - width: 30px; - height: 30px; - visibility: hidden; + display: none; + margin-left: 5px; + width: 20px; + height: 20px; background-image: url(./link.svg); background-repeat: no-repeat; @@ -90,17 +91,15 @@ a.anchor { background-position: center center; } -.hasAnchor:hover a.anchor { - visibility: visible; -} - -@media (max-width: 767px) { - .hasAnchor:hover a.anchor { - visibility: hidden; - } +h1:hover .anchor, +h2:hover .anchor, +h3:hover .anchor, +h4:hover .anchor, +h5:hover .anchor, +h6:hover .anchor { + display: inline-block; } - /* Fixes for fixed navbar --------------------------*/ .contents h1, .contents h2, .contents h3, .contents h4 { @@ -264,31 +263,26 @@ table { /* Syntax highlighting ---------------------------------------------------- */ -pre { - word-wrap: normal; - word-break: normal; - border: 1px solid #eee; -} - -pre, code { +pre, code, pre code { background-color: #f8f8f8; color: #333; } +pre, pre code { + white-space: pre-wrap; + word-break: break-all; + overflow-wrap: break-word; +} -pre code { - overflow: auto; - word-wrap: normal; - white-space: pre; +pre { + border: 1px solid #eee; } -pre .img { +pre .img, pre .r-plt { margin: 5px 0; } -pre .img img { +pre .img img, pre .r-plt img { background-color: #fff; - display: block; - height: auto; } code a, pre a { @@ -305,9 +299,8 @@ a.sourceLine:hover { .kw {color: #264D66;} /* keyword */ .co {color: #888888;} /* comment */ -.message { color: black; font-weight: bolder;} -.error { color: orange; font-weight: bolder;} -.warning { color: #6A0366; font-weight: bolder;} +.error {font-weight: bolder;} +.warning {font-weight: bolder;} /* Clipboard --------------------------*/ @@ -365,3 +358,27 @@ mark { content: ""; } } + +/* Section anchors --------------------------------- + Added in pandoc 2.11: https://github.com/jgm/pandoc-templates/commit/9904bf71 +*/ + +div.csl-bib-body { } +div.csl-entry { + clear: both; +} +.hanging-indent div.csl-entry { + margin-left:2em; + text-indent:-2em; +} +div.csl-left-margin { + min-width:2em; + float:left; +} +div.csl-right-inline { + margin-left:2em; + padding-left:1em; +} +div.csl-indent { + margin-left: 2em; +} diff --git a/docs/pkgdown.js b/docs/pkgdown.js index 7e7048f..6f0eee4 100644 --- a/docs/pkgdown.js +++ b/docs/pkgdown.js @@ -80,7 +80,7 @@ $(document).ready(function() { var copyButton = ""; - $(".examples, div.sourceCode").addClass("hasCopyButton"); + $("div.sourceCode").addClass("hasCopyButton"); // Insert copy buttons: $(copyButton).prependTo(".hasCopyButton"); @@ -91,7 +91,7 @@ // Initialize clipboard: var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', { text: function(trigger) { - return trigger.parentNode.textContent; + return trigger.parentNode.textContent.replace(/\n#>[^\n]*/g, ""); } }); diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 6311041..6c7270d 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -1,15 +1,20 @@ pandoc: 2.11.4 -pkgdown: 1.6.1 +pkgdown: 2.0.1 pkgdown_sha: ~ articles: bad_parametrization: bad_parametrization.html - basic_usage: basic_usage.html brms: brms.html - discrete_params: discrete_params.html + computational_algorithm1: computational_algorithm1.html + discrete_vars: discrete_vars.html implementing_backends: implementing_backends.html indexing: indexing.html limits_of_SBC: limits_of_SBC.html + rank_visualizations: rank_visualizations.html rejection_sampling: rejection_sampling.html + SBC: SBC.html small_model_workflow: small_model_workflow.html -last_built: 2021-09-26T14:43Z +last_built: 2022-02-17T10:05Z +urls: + reference: https://hyunjimoon.github.io/SBC/reference + article: https://hyunjimoon.github.io/SBC/articles diff --git a/docs/reference/ECDF-plots.html b/docs/reference/ECDF-plots.html index 099e755..764b7b0 100644 --- a/docs/reference/ECDF-plots.html +++ b/docs/reference/ECDF-plots.html @@ -1,67 +1,14 @@ - - - - - - - -Plot the ECDF-based plots. — plot_ecdf • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Plot the ECDF-based plots. — plot_ecdf • SBC - - - - + + -
    -
    - -
    - -
    +
    -

    See the methods for data_for_ecdf_plots() for available data formats.

    +

    See vignette("rank_visualizations") for +more details. +See the methods for data_for_ecdf_plots() for available data formats.

    -
    plot_ecdf(
    -  x,
    -  parameters = NULL,
    -  K = NULL,
    -  gamma = NULL,
    -  prob = 0.95,
    -  size = 1,
    -  alpha = 0.33,
    -  ...
    -)
    -
    -plot_ecdf_diff(
    -  x,
    -  parameters = NULL,
    -  K = NULL,
    -  gamma = NULL,
    -  prob = 0.95,
    -  size = 1,
    -  alpha = 0.33,
    -  ...
    -)
    +
    +
    plot_ecdf(
    +  x,
    +  variables = NULL,
    +  K = NULL,
    +  gamma = NULL,
    +  prob = 0.95,
    +  size = 1,
    +  alpha = 0.33,
    +  ...,
    +  parameters = NULL
    +)
    +
    +plot_ecdf_diff(
    +  x,
    +  variables = NULL,
    +  K = NULL,
    +  gamma = NULL,
    +  prob = 0.95,
    +  size = 1,
    +  alpha = 0.33,
    +  ...,
    +  parameters = NULL
    +)
    +
    -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    x

    object supporting the data_for_ecdf_plots() method.

    parameters

    optional subset of parameters to show in the plot

    K

    number of uniformly spaced evaluation points for the ECDF or ECDFs. Affects +

    +

    Arguments

    +
    x
    +

    object supporting the data_for_ecdf_plots() method.

    +
    variables
    +

    optional subset of variables to show in the plot

    +
    K
    +

    number of uniformly spaced evaluation points for the ECDF or ECDFs. Affects the granularity of the plot and can significantly speed up the computation of the simultaneous confidence bands. Defaults to the smaller of number of -ranks per parameter and the maximum rank.

    gamma

    TODO

    prob

    the width of the plotted confidence interval for the ECDF.

    size

    size passed to ggplot2::geom_ribbon() for the confidence band

    alpha

    alpha level of the confidence band

    ...

    additional arguments passed to data_for_ecdf_plots(). -Most notably, if x is matrix, a max_rank parameter needs to be given.

    - -

    Details

    - -

    arxiv::1903.08008 by A. Vehtari et al.

    -

    See also

    - - +ranks per variable and the maximum rank.

    +
    gamma
    +

    TODO

    +
    prob
    +

    the width of the plotted confidence interval for the ECDF.

    +
    size
    +

    size passed to ggplot2::geom_ribbon() for the confidence band

    +
    alpha
    +

    alpha level of the confidence band

    +
    ...
    +

    additional arguments passed to data_for_ecdf_plots(). +Most notably, if x is matrix, a max_rank parameter needs to be given.

    +
    parameters
    +

    DEPRECATED, use variables instead.

    +
    +
    +

    Details

    +

    arxiv::1903.08008 by A. Vehtari et al.

    +
    +
    +

    See also

    + +
    +
    -
    - +
    - - + + diff --git a/docs/reference/SBC-deprecated.html b/docs/reference/SBC-deprecated.html new file mode 100644 index 0000000..e2c2372 --- /dev/null +++ b/docs/reference/SBC-deprecated.html @@ -0,0 +1,158 @@ + +Deprecated functions in package SBC. — SBC-deprecated • SBC + + +
    +
    + + + +
    +
    + + +
    +

    The functions listed below are deprecated and will be defunct in +the near future. When possible, alternative functions with similar +functionality are also mentioned. Help pages for deprecated functions are +available at help("<function>-deprecated").

    +
    + +
    +
    compute_results(...)
    +
    +recompute_statistics(...)
    +
    + +
    +

    compute_results

    + + +

    Instead of compute_results, use compute_SBC.

    +
    +
    +

    recompute_statistics

    + + +

    Instead of recompute_statistics, use recompute_SBC_statistics.

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.1.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/SBC_backend_brms.html b/docs/reference/SBC_backend_brms.html index e7993ff..d527ab3 100644 --- a/docs/reference/SBC_backend_brms.html +++ b/docs/reference/SBC_backend_brms.html @@ -1,67 +1,12 @@ - - - - - - - -Build a backend based on the brms package. — SBC_backend_brms • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Build a backend based on the brms package. — SBC_backend_brms • SBC - - - - + + -
    -
    - -
    - -
    +
    @@ -168,47 +107,42 @@

    Build a backend based on the brms package.

    Build a backend based on the brms package.

    -
    SBC_backend_brms(..., template_dataset)
    - -

    Arguments

    - - - - - - - - - - -
    ...

    arguments passed to brm.

    template_dataset

    a representative dataset that can be used to generate code.

    +
    +
    SBC_backend_brms(..., template_data, template_dataset = NULL)
    +
    +
    +

    Arguments

    +
    ...
    +

    arguments passed to brm.

    +
    template_data
    +

    a representative value for the data argument in brm +that can be used to generate code.

    +
    template_dataset
    +

    DEPRECATED. Use template_data

    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/SBC_backend_brms_from_generator.html b/docs/reference/SBC_backend_brms_from_generator.html index 5b7ec54..760a214 100644 --- a/docs/reference/SBC_backend_brms_from_generator.html +++ b/docs/reference/SBC_backend_brms_from_generator.html @@ -1,70 +1,15 @@ - - - - - - - -Build a brms backend, reusing the compiled model from a previously created SBC_generator_brms -object. — SBC_backend_brms_from_generator • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Build a brms backend, reusing the compiled model from a previously created SBC_generator_brms +object. — SBC_backend_brms_from_generator • SBC - - - - + + -
    -
    - -
    - -
    +
    @@ -173,35 +112,32 @@

    Build a brms backend, reusing the compiled model from a previously created < object.

    -
    SBC_backend_brms_from_generator(generator, ...)
    - +
    +
    SBC_backend_brms_from_generator(generator, ...)
    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/SBC_backend_cmdstan_sample.html b/docs/reference/SBC_backend_cmdstan_sample.html index 6973077..672563b 100644 --- a/docs/reference/SBC_backend_cmdstan_sample.html +++ b/docs/reference/SBC_backend_cmdstan_sample.html @@ -1,67 +1,12 @@ - - - - - - - -Backend based on sampling via cmdstanr. — SBC_backend_cmdstan_sample • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Backend based on sampling via cmdstanr. — SBC_backend_cmdstan_sample • SBC - - - - + + -
    -
    - -
    - -
    +
    @@ -168,49 +107,41 @@

    Backend based on sampling via cmdstanr.

    Backend based on sampling via cmdstanr.

    -
    SBC_backend_cmdstan_sample(model, ...)
    - -

    Arguments

    - - - - - - - - - - -
    model

    an object of class CmdStanModel (as created by cmdstanr::cmdstan_model)

    ...

    other arguments passed to the $sample() method of the model. The data and -parallel_chains arguments cannot be set this way as they need to be controlled by the SBC -package.

    +
    +
    SBC_backend_cmdstan_sample(model, ...)
    +
    +
    +

    Arguments

    +
    model
    +

    an object of class CmdStanModel (as created by cmdstanr::cmdstan_model)

    +
    ...
    +

    other arguments passed to the $sample() method of the model. The data and +parallel_chains arguments cannot be set this way as they need to be controlled by the SBC +package.

    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/SBC_backend_cmdstan_variational.html b/docs/reference/SBC_backend_cmdstan_variational.html index 117551c..f42b0c3 100644 --- a/docs/reference/SBC_backend_cmdstan_variational.html +++ b/docs/reference/SBC_backend_cmdstan_variational.html @@ -1,67 +1,12 @@ - - - - - - - -Backend based on variational approximation via cmdstanr. — SBC_backend_cmdstan_variational • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Backend based on variational approximation via cmdstanr. — SBC_backend_cmdstan_variational • SBC - - - - + + -
    -
    - -
    - -
    +
    @@ -168,49 +107,46 @@

    Backend based on variational approximation via cmdstanr.

    Backend based on variational approximation via cmdstanr.

    -
    SBC_backend_cmdstan_variational(model, ...)
    - -

    Arguments

    - - - - - - - - - - -
    model

    an object of class CmdStanModel (as created by cmdstanr::cmdstan_model)

    ...

    other arguments passed to the $variational() method of the model. The data and -parallel_chains arguments cannot be set this way as they need to be controlled by the SBC -package.

    +
    +
    SBC_backend_cmdstan_variational(model, ..., n_retries_init = 1)
    +
    +
    +

    Arguments

    +
    model
    +

    an object of class CmdStanModel (as created by cmdstanr::cmdstan_model)

    +
    ...
    +

    other arguments passed to the $variational() method of the model. +The data argument cannot be set this way as they need to be controlled by the SBC +package.

    +
    n_retries_init
    +

    number of times to retry the variational fit if the algorithm +has trouble initializing (e.g. too many dropped evaluations +(see https://discourse.mc-stan.org/t/advi-too-many-dropped-evaluations-even-for-well-behaved-models/24338), +or "cannot compute ELBO using the initial variational distribution")

    +
    +
    -
    - +

    - - + + diff --git a/docs/reference/SBC_backend_default_thin_ranks.html b/docs/reference/SBC_backend_default_thin_ranks.html index c78eadd..2d5ffbb 100644 --- a/docs/reference/SBC_backend_default_thin_ranks.html +++ b/docs/reference/SBC_backend_default_thin_ranks.html @@ -1,68 +1,13 @@ - - - - - - - -S3 generic to get backend-specific default thinning for rank computation. — SBC_backend_default_thin_ranks • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -S3 generic to get backend-specific default thinning for rank computation. — SBC_backend_default_thin_ranks • SBC - - - - + + -
    -
    - -
    - -
    +

    The default implementation plays it relatively safe and returns 10, unless -SBC_backend_iid_samples() returns TRUE in which case it returns 1.

    +SBC_backend_iid_draws() returns TRUE in which case it returns 1.

    -
    SBC_backend_default_thin_ranks(backend)
    -
    -# S3 method for default
    -SBC_backend_default_thin_ranks(backend)
    +
    +
    SBC_backend_default_thin_ranks(backend)
     
    +# S3 method for default
    +SBC_backend_default_thin_ranks(backend)
    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/SBC_backend_hash_for_cache.html b/docs/reference/SBC_backend_hash_for_cache.html index e913a03..8217ab2 100644 --- a/docs/reference/SBC_backend_hash_for_cache.html +++ b/docs/reference/SBC_backend_hash_for_cache.html @@ -1,68 +1,13 @@ - - - - - - - -Get hash used to identify cached results. — SBC_backend_hash_for_cache • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Get hash used to identify cached results. — SBC_backend_hash_for_cache • SBC - - - - + + -
    -
    - -
    - -
    +
    -

    S3 generic that allows backends to override how a hash is computed. By default rlang::hash() +

    S3 generic that allows backends to override how a hash is computed. By default rlang::hash() is used.

    -
    SBC_backend_hash_for_cache(backend)
    - +
    +
    SBC_backend_hash_for_cache(backend)
    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/SBC_backend_iid_draws.html b/docs/reference/SBC_backend_iid_draws.html new file mode 100644 index 0000000..652d5a5 --- /dev/null +++ b/docs/reference/SBC_backend_iid_draws.html @@ -0,0 +1,160 @@ + +S3 generic to let backends signal that they produced independent draws. — SBC_backend_iid_draws • SBC + + +
    +
    + + + +
    +
    + + +
    +

    Most backends (e.g. those based on variatns of MCMC) don't produce +independent draws and thus diagnostics like Rhat and ESS are important +and draws may need thinning. Backends that already produce independent +draws (e.g. ADVI/optimizing) can implement this method to return TRUE +to signal this is the case. If this method returns TRUE, ESS and Rhat will +always attain their best possible values and SBC_backend_default_thin_ranks() +will return 1. +The default implementation returns FALSE.

    +
    + +
    +
    SBC_backend_iid_draws(backend)
    +
    +# S3 method for default
    +SBC_backend_iid_draws(backend)
    +
    + +
    +

    Arguments

    +
    backend
    +

    to check

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.1.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/SBC_backend_iid_samples.html b/docs/reference/SBC_backend_iid_samples.html deleted file mode 100644 index 75fb7ad..0000000 --- a/docs/reference/SBC_backend_iid_samples.html +++ /dev/null @@ -1,221 +0,0 @@ - - - - - - - - -S3 generic to let backends signal that they produced independent samples. — SBC_backend_iid_samples • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Most backends (e.g. those based on variatns of MCMC) don't produce -independent samples and thus diagnostics like Rhat and ESS are important -and samples may need thinning. Backends that already produce independent -samples (e.g. ADVI/optimizing) can implement this method to return TRUE -to signal this is the case. The default implementation returns FALSE.

    -
    - -
    SBC_backend_iid_samples(backend)
    -
    -# S3 method for default
    -SBC_backend_iid_samples(backend)
    - -

    Arguments

    - - - - - - -
    backend

    to check

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/SBC_backend_mock.html b/docs/reference/SBC_backend_mock.html index 83b01ec..8974d6d 100644 --- a/docs/reference/SBC_backend_mock.html +++ b/docs/reference/SBC_backend_mock.html @@ -1,69 +1,14 @@ - - - - - - - -A mock backend. — SBC_backend_mock • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -A mock backend. — SBC_backend_mock • SBC - - - - - - - - - - - - - + + -
    -
    - -
    - -
    +
    @@ -172,49 +111,43 @@

    A mock backend.

    provide result as the draws generated by the backend.

    -
    SBC_backend_mock(
    -  result = posterior::draws_matrix(a = rnorm(100)),
    -  output = NULL,
    -  message = NULL,
    -  warning = NULL,
    -  error = NULL
    -)
    - -

    Arguments

    - - - - - - -
    result

    a draws_matrix that will be returned regardless of the data

    +
    +
    SBC_backend_mock(
    +  result = posterior::draws_matrix(a = rnorm(100)),
    +  output = NULL,
    +  message = NULL,
    +  warning = NULL,
    +  error = NULL
    +)
    +
    +
    +

    Arguments

    +
    result
    +

    a draws_matrix that will be returned regardless of the data

    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/SBC_backend_rjags.html b/docs/reference/SBC_backend_rjags.html new file mode 100644 index 0000000..0bbbde6 --- /dev/null +++ b/docs/reference/SBC_backend_rjags.html @@ -0,0 +1,164 @@ + +Create a JAGS backend using rjags — SBC_backend_rjags • SBC + + +
    +
    + + + +
    +
    + + +
    +

    Create a JAGS backend using rjags

    +
    + +
    +
    SBC_backend_rjags(
    +  file,
    +  n.iter,
    +  n.burnin,
    +  variable.names,
    +  thin = 1,
    +  na.rm = TRUE,
    +  ...
    +)
    +
    + +
    +

    Arguments

    +
    file
    +

    model file or connection to model code (passed to rjags::jags.model())

    +
    n.iter
    +

    number of iterations for sampling (passed to [rjags::coda.samples())

    +
    n.burnin
    +

    number of iterations used for burnin

    +
    variable.names
    +

    names of variables to monitor (passed to rjags::coda.samples())

    +
    thin
    +

    thinning (passed to rjags::coda.samples())

    +
    na.rm
    +

    whether to omit variables containing NA (passed to rjags::coda.samples())

    +
    ...
    +

    additional optional arguments passed to rjags::jags.model()

    • most notably n.chains, n.adapt and inits.

    • +
    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.1.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/SBC_backend_rstan_optimizing.html b/docs/reference/SBC_backend_rstan_optimizing.html new file mode 100644 index 0000000..7cfd3c2 --- /dev/null +++ b/docs/reference/SBC_backend_rstan_optimizing.html @@ -0,0 +1,151 @@ + +SBC backend using the optimizing method from rstan. — SBC_backend_rstan_optimizing • SBC + + +
    +
    + + + +
    +
    + + +
    +

    SBC backend using the optimizing method from rstan.

    +
    + +
    +
    SBC_backend_rstan_optimizing(model, ..., n_retries_hessian = 1)
    +
    + +
    +

    Arguments

    +
    model
    +

    a stanmodel object (created via rstan::stan_model)

    +
    ...
    +

    other arguments passed to optimizing (number of iterations, ...). +Argument data cannot be set this way as they need to be +controlled by the package.

    +
    n_retries_hessian
    +

    the number of times the backend is allow to retry optimization +(with different seeed) to produce a usable Hessian that can produce draws. In some cases, +the Hessian may be numerically unstable and not be positive definite.

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.1.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/SBC_backend_rstan_sample.html b/docs/reference/SBC_backend_rstan_sample.html index 00eab98..daa5246 100644 --- a/docs/reference/SBC_backend_rstan_sample.html +++ b/docs/reference/SBC_backend_rstan_sample.html @@ -1,67 +1,12 @@ - - - - - - - -SBC backend using the sampling method from rstan. — SBC_backend_rstan_sample • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -SBC backend using the sampling method from rstan. — SBC_backend_rstan_sample • SBC - - - - + + -
    -
    - -
    - -
    +
    @@ -168,49 +107,41 @@

    SBC backend using the sampling method from rstan.<

    SBC backend using the sampling method from rstan.

    -
    SBC_backend_rstan_sample(model, ...)
    - -

    Arguments

    - - - - - - - - - - -
    model

    a stanmodel object (created via rstan::stan_model)

    ...

    other arguments passed to sampling (number of iterations, ...). -Arguments data and cores cannot be set this way as they need to be -controlled by the package.

    +
    +
    SBC_backend_rstan_sample(model, ...)
    +
    +
    +

    Arguments

    +
    model
    +

    a stanmodel object (created via rstan::stan_model)

    +
    ...
    +

    other arguments passed to sampling (number of iterations, ...). +Arguments data and cores cannot be set this way as they need to be +controlled by the package.

    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/SBC_datasets.html b/docs/reference/SBC_datasets.html index 0fae847..68776a2 100644 --- a/docs/reference/SBC_datasets.html +++ b/docs/reference/SBC_datasets.html @@ -1,68 +1,13 @@ - - - - - - - -Create new SBC_datasets object. — SBC_datasets • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Create new SBC_datasets object. — SBC_datasets • SBC - - - - + + -
    -
    - -
    - -
    +
    @@ -170,49 +109,43 @@

    Create new SBC_datasets object.

    for full control, you can also create datasets directly via this function.

    -
    SBC_datasets(parameters, generated)
    - -

    Arguments

    - - - - - - - - - - -
    parameters

    samples of "true" values of unobserved parameters. -An object of class draws_matrix (from the posterior package)

    generated

    a list of objects that can be passed as data to the backend you plan to use. -(e.g. list of values for Stan-based backends, a data frame for SBC_backend_brms)

    +
    +
    SBC_datasets(variables, generated, parameters = NULL)
    +
    +
    +

    Arguments

    +
    variables
    +

    draws of "true" values of unobserved parameters or other derived variables. +An object of class draws_matrix (from the posterior package)

    +
    generated
    +

    a list of objects that can be passed as data to the backend you plan to use. +(e.g. list of values for Stan-based backends, a data frame for SBC_backend_brms)

    +
    parameters
    +

    DEPRECATED. Use variables instead.

    +
    +
    -
    - +

    - - + + diff --git a/docs/reference/SBC_example_backend.html b/docs/reference/SBC_example_backend.html new file mode 100644 index 0000000..5467307 --- /dev/null +++ b/docs/reference/SBC_example_backend.html @@ -0,0 +1,155 @@ + +Construct a backend to be used in the examples. — SBC_example_backend • SBC + + +
    +
    + + + +
    +
    + + +
    +

    Note that this will involve compiling a Stan model and may take a while.

    +
    + +
    +
    SBC_example_backend(
    +  example = c("normal_sd", "normal_bad"),
    +  interface = c("rstan", "cmdstanr", "rjags")
    +)
    +
    + +
    +

    Arguments

    +
    example
    +

    name of the example model. normal_sd is a simple model fitting +a normal distribution parametrized as mean and standard deviation. +normal_bad is a model that tries to implement the normal_sd model, +but assumes an incorrect parametrization of the normal distribution. +For Stan-based backends, the model is written as if Stan parametrized +normal distribution with precision (while Stan uses sd), for JAGS-based +backends the model is written as if JAGS parametrized normal distribution +with sd (while JAGS uses precision).

    +
    interface
    +

    name of the interface to be used to fit the model

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.1.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/SBC_example_generator.html b/docs/reference/SBC_example_generator.html new file mode 100644 index 0000000..e0b7ceb --- /dev/null +++ b/docs/reference/SBC_example_generator.html @@ -0,0 +1,149 @@ + +Construct a generator used in the examples. — SBC_example_generator • SBC + + +
    +
    + + + +
    +
    + + +
    +

    Construct a generator used in the examples.

    +
    + +
    +
    SBC_example_generator(example = c("normal"), N = 100)
    +
    + +
    +

    Arguments

    +
    example
    +

    name of example

    +
    N
    +

    size of the dataset the generator should simulate

    +
    +
    +

    Value

    +

    an object that can be passed to generate_datasets()

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.1.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/SBC_example_results.html b/docs/reference/SBC_example_results.html new file mode 100644 index 0000000..f8594b9 --- /dev/null +++ b/docs/reference/SBC_example_results.html @@ -0,0 +1,167 @@ + +Combine an example backend with an example generator to provide full +results that can be used to test other functions in the package. — SBC_example_results • SBC + + +
    +
    + + + +
    +
    + + +
    +

    Except for example = "visualizations", all examples will actually +compile and fit Stan models and thus may take a while to complete.

    +
    + +
    +
    SBC_example_results(
    +  example = c("normal_ok", "normal_bad", "visualizations"),
    +  interface = c("rstan", "cmdstanr", "rjags"),
    +  N = 100,
    +  n_sims = 50
    +)
    +
    + +
    +

    Arguments

    +
    example
    +
    • name of the example. normal_ok is an example +where the generator matches the model +(using the normal generator and normal_sd backend), while +normal_bad is an example with a mismatch between the generator and backend +that manifests in SBC (normal_bad combines the normal generator with +normal_bad backend). visualizations creates a purely artificial results +that are meant to showcase the built-in plots (the interface parameter will +be ignored).

    • +
    +
    interface
    +

    name of the interface to be used for the backend

    +
    N
    +

    number of datapoints to simulate from the generator for each simulation

    +
    n_sims
    +

    number of simulations to perform

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.1.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/SBC_fit.html b/docs/reference/SBC_fit.html index b4e4fa7..43a80ea 100644 --- a/docs/reference/SBC_fit.html +++ b/docs/reference/SBC_fit.html @@ -1,70 +1,15 @@ - - - - - - - -Use backend to fit a model to data. — SBC_fit • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -S3 generic using backend to fit a model to data. — SBC_fit • SBC - - - - - - - - - - - - - + + -
    -
    - -
    - -
    +
    -

    S3 generic, needs to be implemented by all backends. +

    Needs to be implemented by all backends. All implementations have to return an object for which you can safely -call SBC_fit_to_draws_matrix() and get some draws. +call SBC_fit_to_draws_matrix() and get some draws. If that's not possible an error should be raised.

    -
    SBC_fit(backend, generated, cores)
    - +
    +
    SBC_fit(backend, generated, cores)
    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/SBC_fit_to_diagnostics.html b/docs/reference/SBC_fit_to_diagnostics.html index 6f3d5b8..27b0737 100644 --- a/docs/reference/SBC_fit_to_diagnostics.html +++ b/docs/reference/SBC_fit_to_diagnostics.html @@ -1,69 +1,14 @@ - - - - - - - -S3 generic to get backend-specific diagnostics. — SBC_fit_to_diagnostics • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -S3 generic to get backend-specific diagnostics. — SBC_fit_to_diagnostics • SBC - - - - - - - - - - - - - + + -
    -
    - -
    - -
    +

    The diagnostics object has to be a data.frame but may inherit additional classes - in particular it may be useful -for the returning object to implement get_diagnostic_messages().

    +for the returning object to implement get_diagnostic_messages().

    -
    SBC_fit_to_diagnostics(fit, fit_output, fit_messages, fit_warnings)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - -
    fit

    The fit returned by SBC_fit

    fit_output

    a character string capturing what the backend wrote to stdout

    fit_messages

    a character vector of messages the backend raised

    fit_warnings

    a character vector of warnings the backend raised

    - -

    Value

    +
    +
    SBC_fit_to_diagnostics(fit, fit_output, fit_messages, fit_warnings)
    +
    +
    +

    Arguments

    +
    fit
    +

    The fit returned by SBC_fit

    +
    fit_output
    +

    a character string capturing what the backend wrote to stdout

    +
    fit_messages
    +

    a character vector of messages the backend raised

    +
    fit_warnings
    +

    a character vector of warnings the backend raised

    +
    +
    +

    Value

    an single row data.frame that includes diagnostics or NULL, if no diagnostics available.

    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/SBC_fit_to_draws_matrix.html b/docs/reference/SBC_fit_to_draws_matrix.html new file mode 100644 index 0000000..5dac3c8 --- /dev/null +++ b/docs/reference/SBC_fit_to_draws_matrix.html @@ -0,0 +1,147 @@ + +S3 generic converting a fitted model to a draws_matrix object. — SBC_fit_to_draws_matrix • SBC + + +
    +
    + + + +
    +
    + + +
    +

    Needs to be implemented for all types of objects the backend can +return from SBC_fit(). Default implementation just calls, +posterior::as_draws_matrix(), so if the fit object already supports +this, it will work out of the box.

    +
    + +
    +
    SBC_fit_to_draws_matrix(fit)
    +
    +# S3 method for default
    +SBC_fit_to_draws_matrix(fit)
    +
    + + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.1.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/SBC_generator_brms.html b/docs/reference/SBC_generator_brms.html index 91b10ba..bd346c5 100644 --- a/docs/reference/SBC_generator_brms.html +++ b/docs/reference/SBC_generator_brms.html @@ -1,68 +1,13 @@ - - - - - - - -Create a brms generator. — SBC_generator_brms • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Create a brms generator. — SBC_generator_brms • SBC - - - - + + -
    -
    - -
    - -
    +
    @@ -170,49 +109,41 @@

    Create a brms generator.

    new datasets.

    -
    SBC_generator_brms(..., generate_lp = TRUE)
    - -

    Arguments

    - - - - - - - - - - -
    ...

    arguments passed to brms::brm

    generate_lp

    whether to compute the overall log-likelihood of the model -as an additional parameter. This can be somewhat computationally expensive, -but improves sensitivity of the SBC process.

    +
    +
    SBC_generator_brms(..., generate_lp = TRUE)
    +
    +
    +

    Arguments

    +
    ...
    +

    arguments passed to brms::brm

    +
    generate_lp
    +

    whether to compute the overall log-likelihood of the model +as an additional variable. This can be somewhat computationally expensive, +but improves sensitivity of the SBC process.

    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/SBC_generator_custom.html b/docs/reference/SBC_generator_custom.html index 1082b46..5a6a6ef 100644 --- a/docs/reference/SBC_generator_custom.html +++ b/docs/reference/SBC_generator_custom.html @@ -1,68 +1,13 @@ - - - - - - - -Wrap a function the creates a complete dataset. — SBC_generator_custom • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Wrap a function the creates a complete dataset. — SBC_generator_custom • SBC - - - - + + -
    -
    - -
    - -
    +
    @@ -170,61 +109,54 @@

    Wrap a function the creates a complete dataset.

    arguments, but does not do anything more..

    -
    SBC_generator_custom(f, ...)
    - -

    Arguments

    - - - - - - - - - - -
    f

    function accepting at least an n_datasets argument and returning -and SBC_datasets object

    ...

    Additional arguments passed to f

    - -

    Details

    - -

    Running:

    gen <- SBC_generator_custom(f, <<some other args>>)
    -datasets <- generate_datasets(gen, n_datasets = my_n_datasets)
    -

    -

    is equivalent to just running

    datasets <- f(<<some other args>>, n_datasets = my_n_datasets)
    -

    +
    +
    SBC_generator_custom(f, ...)
    +
    + +
    +

    Arguments

    +
    f
    +

    function accepting at least an n_sims argument and returning +and SBC_datasets object

    +
    ...
    +

    Additional arguments passed to f

    +
    +
    +

    Details

    +

    Running:

    gen <- SBC_generator_custom(f, <<some other args>>)
    +datasets <- generate_datasets(gen, n_sims = my_n_sims)
    +

    +

    is equivalent to just running

    datasets <- f(<<some other args>>, n_sims = my_n_sims)
    +

    So whenever you control the code calling generate_datasets, it usually makes more sense to just create an SBC_datasets object directly and avoid using SBC_generator_custom and generate_datasets at all. SBC_generator_custom can however be useful, when a code you do not control calls generate_datasets for you and the built-in generators do not provide you with enough flexibility.

    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/SBC_generator_function.html b/docs/reference/SBC_generator_function.html index b1804bd..0beee20 100644 --- a/docs/reference/SBC_generator_function.html +++ b/docs/reference/SBC_generator_function.html @@ -1,67 +1,12 @@ - - - - - - - -Generate datasets via a function that creates a single dataset. — SBC_generator_function • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Generate datasets via a function that creates a single dataset. — SBC_generator_function • SBC - - - - + + -
    -
    - -
    - -
    +
    @@ -168,49 +107,41 @@

    Generate datasets via a function that creates a single dataset.

    Generate datasets via a function that creates a single dataset.

    -
    SBC_generator_function(f, ...)
    +
    +
    SBC_generator_function(f, ...)
    +
    -

    Arguments

    - - - - - - - - - - -
    f

    function returning a list with elements parameters +

    +

    Arguments

    +
    f
    +

    function returning a list with elements variables (prior draws, a list or anything that can be converted to draws_rvars) and -generated (observed dataset, ready to be passed to backend)

    ...

    Additional arguments passed to f

    - +generated (observed dataset, ready to be passed to backend)

    +
    ...
    +

    Additional arguments passed to f

    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/SBC_print_example_model.html b/docs/reference/SBC_print_example_model.html new file mode 100644 index 0000000..6e1e3c5 --- /dev/null +++ b/docs/reference/SBC_print_example_model.html @@ -0,0 +1,146 @@ + +Print the Stan code of a model used in the examples. — SBC_print_example_model • SBC + + +
    +
    + + + +
    +
    + + +
    +

    Print the Stan code of a model used in the examples.

    +
    + +
    +
    SBC_print_example_model(
    +  example = c("normal_sd", "normal_bad"),
    +  interface = c("rstan", "cmdstanr", "rjags")
    +)
    +
    + +
    +

    Arguments

    +
    example
    +

    name of the example model.

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.1.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/SBC_results.html b/docs/reference/SBC_results.html index 5328f4a..2014b52 100644 --- a/docs/reference/SBC_results.html +++ b/docs/reference/SBC_results.html @@ -1,75 +1,12 @@ - - - - - - - -SBC_results objects. — SBC_results • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Create an SBC_results object — SBC_results • SBC - - - - + + -
    -
    - -
    - -
    +
    -

    The SBC_results contains the following fields:

      -
    • $stats statistics for all parameters and fits (one row per parameter-fit combination)

    • +

      This will build and validate an SBC_results object from its constituents.

      +
    + +
    +
    SBC_results(
    +  stats,
    +  fits,
    +  backend_diagnostics,
    +  default_diagnostics,
    +  outputs,
    +  messages,
    +  warnings,
    +  errors
    +)
    +
    + +
    +

    Details

    +

    The SBC_results contains the following fields:

    • $stats statistics for all variables and fits (one row per variable-fit combination)

    • $fits the raw fits (unless keep_fits = FALSE) or NULL if the fit failed

    • $errors error messages that caused fit failures

    • $outputs, $messages, $warnings the outputs/messages/warnings written by fits

    • $default_diagnostics a data frame of default convergence/correctness diagnostics (one row per fit)

    • $backend_diagnostics a data frame of backend-specific diagnostics (one row per fit)

    • -
    - -
    - -
    SBC_results(
    -  stats,
    -  fits,
    -  backend_diagnostics,
    -  default_diagnostics,
    -  outputs,
    -  messages,
    -  warnings,
    -  errors
    -)
    - - +
    +
    -
    - +
    - - + + diff --git a/docs/reference/SBC_statistics_from_single_fit.html b/docs/reference/SBC_statistics_from_single_fit.html new file mode 100644 index 0000000..0ae51bc --- /dev/null +++ b/docs/reference/SBC_statistics_from_single_fit.html @@ -0,0 +1,170 @@ + +Recompute SBC statistics given a single fit. — SBC_statistics_from_single_fit • SBC + + +
    +
    + + + +
    +
    + + +
    +

    Potentially useful for doing some advanced stuff, but should not +be used in regular workflow. Use recompute_SBC_statistics() to update +an [SBC_results] objects with different thin_ranks or other settings.

    +
    + +
    +
    SBC_statistics_from_single_fit(
    +  fit,
    +  variables,
    +  generated,
    +  thin_ranks,
    +  ensure_num_ranks_divisor,
    +  gen_quants,
    +  backend
    +)
    +
    + +
    +

    Arguments

    +
    thin_ranks
    +

    how much thinning should be applied to posterior draws before computing +ranks for SBC. Should be large enough to avoid any noticeable autocorrelation of the +thinned draws See details below.

    +
    ensure_num_ranks_divisor
    +

    Potentially drop some posterior samples to +ensure that this number divides the total number of SBC ranks (see Details).

    +
    backend
    +

    the model + sampling algorithm. The built-in backends can be constructed +using SBC_backend_cmdstan_sample(), SBC_backend_cmdstan_variational(), +SBC_backend_rstan_sample(), SBC_backend_rstan_optimizing() and SBC_backend_brms(). +(more to come: issue 31, 38, 39). The backend is an S3 class supporting at least the SBC_fit(), +SBC_fit_to_draws_matrix() methods.

    +
    + + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.1.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/bind_datasets.html b/docs/reference/bind_datasets.html index d383839..03654e8 100644 --- a/docs/reference/bind_datasets.html +++ b/docs/reference/bind_datasets.html @@ -1,67 +1,12 @@ - - - - - - - -Combine multiple datasets together. — bind_datasets • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Combine multiple datasets together. — bind_datasets • SBC - - - - + + -
    -
    - -
    - -
    +
    @@ -168,43 +107,37 @@

    Combine multiple datasets together.

    Combine multiple datasets together.

    -
    bind_datasets(...)
    - -

    Arguments

    - - - - - - -
    ...

    datasets to bind

    +
    +
    bind_datasets(...)
    +
    +
    +

    Arguments

    +
    ...
    +

    datasets to bind

    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/bind_results.html b/docs/reference/bind_results.html index 69b1c1c..8c752dc 100644 --- a/docs/reference/bind_results.html +++ b/docs/reference/bind_results.html @@ -1,67 +1,12 @@ - - - - - - - -Combine multiple SBC results together. — bind_results • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Combine multiple SBC results together. — bind_results • SBC - - - - + + -
    -
    - -
    - -
    +
    -

    Primarily useful for iteratively adding more datasets to your SBC check.

    +

    Primarily useful for iteratively adding more simulations to your SBC check.

    -
    bind_results(...)
    - -

    Arguments

    - - - - - - -
    ...

    objects of type SBC_results to be combined.

    - -

    Details

    +
    +
    bind_results(...)
    +
    +
    +

    Arguments

    +
    ...
    +

    objects of type SBC_results to be combined.

    +
    +
    +

    Details

    An example usage can be found in the small_model_workflow vignette.

    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/calculate_prior_sd.html b/docs/reference/calculate_prior_sd.html index 2757050..bc9c781 100644 --- a/docs/reference/calculate_prior_sd.html +++ b/docs/reference/calculate_prior_sd.html @@ -1,67 +1,12 @@ - - - - - - - -Calculate prior standard deviation of a dataset — calculate_prior_sd • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Calculate prior standard deviation of a dataset — calculate_prior_sd • SBC - - - - + + -
    -
    - -
    - -
    +
    @@ -168,38 +107,36 @@

    Calculate prior standard deviation of a dataset

    Calculate prior standard deviation of a dataset

    -
    calculate_prior_sd(datasets)
    - - -

    Value

    +
    +
    calculate_prior_sd(datasets)
    +
    +
    +

    Value

    a named vector of prior SDs

    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/calculate_ranks_draws_matrix.html b/docs/reference/calculate_ranks_draws_matrix.html index b867b5c..a89f213 100644 --- a/docs/reference/calculate_ranks_draws_matrix.html +++ b/docs/reference/calculate_ranks_draws_matrix.html @@ -1,68 +1,13 @@ - - - - - - - -Calculate ranks given parameter values within a posterior distribution. — calculate_ranks_draws_matrix • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Calculate ranks given variable values within a posterior distribution. — calculate_ranks_draws_matrix • SBC - - - - + + -
    -
    - -
    - -
    +
    -

    When there are ties (e.g. for discrete parameters), the rank is currently drawn stochastically +

    When there are ties (e.g. for discrete variables), the rank is currently drawn stochastically among the ties.

    -
    calculate_ranks_draws_matrix(params, dm)
    - -

    Arguments

    - - - - - - - - - - -
    params

    a vector of values to check

    dm

    draws_matrix of the fit (assumed to be already thinned if that was necessary)

    +
    +
    calculate_ranks_draws_matrix(variables, dm, params = NULL)
    +
    +
    +

    Arguments

    +
    variables
    +

    a vector of values to check

    +
    dm
    +

    draws_matrix of the fit (assumed to be already thinned if that was necessary)

    +
    params
    +

    DEPRECATED. Use variables instead.

    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/check_all_SBC_diagnostics.html b/docs/reference/check_all_SBC_diagnostics.html index faae1f7..27c504b 100644 --- a/docs/reference/check_all_SBC_diagnostics.html +++ b/docs/reference/check_all_SBC_diagnostics.html @@ -1,67 +1,12 @@ - - - - - - - -Check diagnostics and issue warnings when those fail. — check_all_SBC_diagnostics • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Check diagnostics and issue warnings when those fail. — check_all_SBC_diagnostics • SBC - - - - + + -
    -
    - -
    - -
    +
    @@ -168,44 +107,42 @@

    Check diagnostics and issue warnings when those fail.

    Check diagnostics and issue warnings when those fail.

    -
    check_all_SBC_diagnostics(x)
    -
    -# S3 method for default
    -check_all_SBC_diagnostics(x)
    -
    -# S3 method for SBC_results
    -check_all_SBC_diagnostics(x)
    +
    +
    check_all_SBC_diagnostics(x)
     
    +# S3 method for default
    +check_all_SBC_diagnostics(x)
     
    -    

    Value

    +# S3 method for SBC_results +check_all_SBC_diagnostics(x)
    +
    +
    +

    Value

    TRUE if all checks are OK, FALSE otherwise.

    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/cjs_dist.html b/docs/reference/cjs_dist.html index 5102190..e3f23e1 100644 --- a/docs/reference/cjs_dist.html +++ b/docs/reference/cjs_dist.html @@ -1,68 +1,13 @@ - - - - - - - -Cumulative Jensen-Shannon divergence — cjs_dist • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Cumulative Jensen-Shannon divergence — cjs_dist • SBC - - - - + + -
    -
    - -
    - -
    +
    @@ -170,44 +109,35 @@

    Cumulative Jensen-Shannon divergence

    samples.

    -
    cjs_dist(
    -  x,
    -  y,
    -  x_weights = rep(1/length(x), length(x)),
    -  y_weights = rep(1/length(y), length(y)),
    -  ...
    -)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - -
    x

    numeric vector of samples from first distribution

    y

    numeric vector of samples from second distribution

    x_weights

    numeric vector of weights of first distribution

    y_weights

    numeric vector of weights of second distribution

    ...

    unused

    - -

    Value

    +
    +
    cjs_dist(
    +  x,
    +  y,
    +  x_weights = rep(1/length(x), length(x)),
    +  y_weights = rep(1/length(y), length(y)),
    +  ...
    +)
    +
    +
    +

    Arguments

    +
    x
    +

    numeric vector of draws from first distribution

    +
    y
    +

    numeric vector of draws from second distribution

    +
    x_weights
    +

    numeric vector of weights of first distribution

    +
    y_weights
    +

    numeric vector of weights of second distribution

    +
    ...
    +

    unused

    +
    +
    +

    Value

    distance value based on CJS computation.

    -

    Details

    - +
    +
    +

    Details

    The Cumulative Jensen-Shannon distance is a symmetric metric based on the cumulative Jensen-Shannon divergence. The divergence CJS(P || Q) between two cumulative distribution functions P and Q is defined as:

    @@ -215,40 +145,38 @@

    Details

    The symmetric metric is defined as:

    $$CJS_{dist}(P || Q) = \sqrt{CJS(P || Q) + CJS(Q || P)}$$

    This has an upper bound of \(\sqrt \sum (P(x) + Q(x))\)

    -

    References

    - +
    +
    +

    References

    Nguyen H-V., Vreeken J. (2015). Non-parametric Jensen-Shannon Divergence. In: Appice A., Rodrigues P., Santos Costa V., Gama J., Jorge A., Soares C. (eds) Machine Learning and Knowledge Discovery in Databases. ECML PKDD 2015. Lecture Notes in Computer Science, vol 9285. Springer, Cham. doi:10.1007/978-3-319-23525-7_11

    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/combine_args.html b/docs/reference/combine_args.html index 0dfbdad..bdf29ba 100644 --- a/docs/reference/combine_args.html +++ b/docs/reference/combine_args.html @@ -1,70 +1,15 @@ - - - - - - - -Combine two named lists and overwrite elements with the same name -using the value from args2 — combine_args • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Combine two named lists and overwrite elements with the same name +using the value from args2 — combine_args • SBC - - - - + + -
    -
    - -
    - -
    +
    @@ -173,35 +112,32 @@

    Combine two named lists and overwrite elements with the same name using the value from args2

    -
    combine_args(args1, args2)
    - +
    +
    combine_args(args1, args2)
    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/compute_SBC.html b/docs/reference/compute_SBC.html new file mode 100644 index 0000000..d99c806 --- /dev/null +++ b/docs/reference/compute_SBC.html @@ -0,0 +1,246 @@ + +Fit datasets and evaluate diagnostics and SBC metrics. — compute_SBC • SBC + + +
    +
    + + + +
    +
    + + +
    +

    Performs the main SBC routine given datasets and a backend.

    +
    + +
    +
    compute_SBC(
    +  datasets,
    +  backend,
    +  cores_per_fit = default_cores_per_fit(length(datasets)),
    +  keep_fits = TRUE,
    +  thin_ranks = SBC_backend_default_thin_ranks(backend),
    +  ensure_num_ranks_divisor = 2,
    +  chunk_size = default_chunk_size(length(datasets)),
    +  gen_quants = NULL,
    +  cache_mode = "none",
    +  cache_location = NULL,
    +  globals = list()
    +)
    +
    + +
    +

    Arguments

    +
    datasets
    +

    an object of class SBC_datasets

    +
    backend
    +

    the model + sampling algorithm. The built-in backends can be constructed +using SBC_backend_cmdstan_sample(), SBC_backend_cmdstan_variational(), +SBC_backend_rstan_sample(), SBC_backend_rstan_optimizing() and SBC_backend_brms(). +(more to come: issue 31, 38, 39). The backend is an S3 class supporting at least the SBC_fit(), +SBC_fit_to_draws_matrix() methods.

    +
    cores_per_fit
    +

    how many cores should the backend be allowed to use for a single fit? +Defaults to the maximum number that does not produce more parallel chains +than you have cores. See default_cores_per_fit().

    +
    keep_fits
    +

    boolean, when FALSE full fits are discarded from memory - +reduces memory consumption and increases speed (when processing in parallel), but +prevents you from inspecting the fits and using recompute_SBC_statistics(). +We recommend to set to TRUE in early phases of workflow, when you run just a few fits. +Once the model is stable and you want to run a lot of iterations, we recommend setting +to FALSE (even for quite a simple model, 1000 fits can easily exhaust 32GB of RAM).

    +
    thin_ranks
    +

    how much thinning should be applied to posterior draws before computing +ranks for SBC. Should be large enough to avoid any noticeable autocorrelation of the +thinned draws See details below.

    +
    ensure_num_ranks_divisor
    +

    Potentially drop some posterior samples to +ensure that this number divides the total number of SBC ranks (see Details).

    +
    chunk_size
    +

    How many simulations within the datasets shall be processed in one batch +by the same worker. Relevant only when using parallel processing. +The larger the value, the smaller overhead there will be for parallel processing, but +the work may be distributed less equally across workers. We recommend setting this high +enough that a single batch takes at least several seconds, i.e. for small models, +you can often reduce computation time noticeably by increasing this value. +You can use options(SBC.min_chunk_size = value) to set a minimum chunk size globally. +See documentation of future.chunk.size argument for future.apply::future_lapply() for more details.

    +
    cache_mode
    +

    Type of caching of results, currently the only supported modes are +"none" (do not cache) and "results" where the whole results object is stored +and recomputed only when the hash of the backend or dataset changes.

    +
    cache_location
    +

    The filesystem location of cache. For cache_mode = "results" +this should be a name of a single file. If the file name does not end with +.rds, this extension is appended.

    +
    globals
    +

    A list of names of objects that are defined +in the global environment and need to present for the backend to work ( +if they are not already available in package). +It is added to the globals argument to future::future(), to make those +objects available on all workers.

    +
    +
    +

    Value

    +

    An object of class SBC_results().

    +
    +
    +

    Paralellization

    +

    Parallel processing is supported via the future package, for most uses, it is most sensible +to just call plan(multisession) once in your R session and all +cores your computer will be used. For more details refer to the documentation +of the future package.

    +
    +
    +

    Thinning

    +

    When using backends based on MCMC, there are two possible moments when +draws may need to be thinned. They can be thinned directly within the backend +and they may be thinned only to compute the ranks for SBC as specified by the +thin_ranks argument. The main reason those are separate is that computing the +ranks requires no or negligible autocorrelation while some autocorrelation +may be easily tolerated for summarising the fit results or assessing convergence. +In fact, thinning too aggressively in the backend may lead to overly noisy +estimates of posterior means, quantiles and the posterior::rhat() and +posterior::ess_tail() diagnostics. So for well-adapted Hamiltonian Monte-Carlo +chains (e.g. Stan-based backends), we recommend no thinning in the backend and +even value of thin_ranks between 6 and 10 is usually sufficient to remove +the residual autocorrelation. For a backend based on Metropolis-Hastings, +it might be sensible to thin quite aggressively already in the backend and +then have some additional thinning via thin_ranks.

    +

    Backends that don't require thining should implement SBC_backend_iid_draws() +or SBC_backend_default_thin_ranks() to avoid thinning by default.

    +
    +
    +

    Rank divisors

    +

    Some of the visualizations and post processing steps +we use in the SBC package (e.g. plot_rank_hist(), empirical_coverage()) +work best if the total number of possible SBC ranks is a "nice" number +(lots of divisors). +However, the number of ranks is one plus the number of posterior samples +after thinning - therefore as long as the number of samples is a "nice" +number, the number of ranks usually will not be. To remedy this, you can +specify ensure_num_ranks_divisor - the method will drop at most +ensure_num_ranks_divisor - 1 samples to make the number of ranks divisible +by ensure_num_ranks_divisor. The default 2 prevents the most annoying +pathologies while discarding at most a single sample.

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.1.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/compute_results-deprecated.html b/docs/reference/compute_results-deprecated.html new file mode 100644 index 0000000..76553c1 --- /dev/null +++ b/docs/reference/compute_results-deprecated.html @@ -0,0 +1,139 @@ + +Compute SBC results — compute_results-deprecated • SBC + + +
    +
    + + + +
    +
    + + +
    +

    Delegates directly to compute_SBC().

    +
    + + +
    +

    See also

    + +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.1.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/compute_results.html b/docs/reference/compute_results.html deleted file mode 100644 index 0cb5851..0000000 --- a/docs/reference/compute_results.html +++ /dev/null @@ -1,307 +0,0 @@ - - - - - - - - -Fit datasets and evaluate diagnostics and SBC metrics. — compute_results • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Performs the main SBC routine given datasets and a backend.

    -
    - -
    compute_results(
    -  datasets,
    -  backend,
    -  cores_per_fit = default_cores_per_fit(length(datasets)),
    -  keep_fits = TRUE,
    -  thin_ranks = SBC_backend_default_thin_ranks(backend),
    -  chunk_size = default_chunk_size(length(datasets)),
    -  gen_quants = NULL,
    -  cache_mode = "none",
    -  cache_location = NULL,
    -  globals = list()
    -)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    datasets

    an object of class SBC_datasets

    backend

    the model + sampling algorithm. The built-in backends can be constructed -using SBC_backend_cmdstan_sample(), SBC_backend_cmdstan_variational(), SBC_backend_rstan_sample() and SBC_backend_brms(). -(more to come: issue 31, 38, 39). The backend is an S3 class supporting at least the SBC_fit(), -SBC_fit_to_draws_matrix() methods.

    cores_per_fit

    how many cores should the backend be allowed to use for a single fit? -Defaults to the maximum number that does not produce more parallel chains -than you have cores. See default_cores_per_fit().

    keep_fits

    boolean, when FALSE full fits are discarded from memory - -reduces memory consumption and increases speed (when processing in parallel), but -prevents you from inspecting the fits and using recompute_statistics(). -We recommend to set to TRUE in early phases of workflow, when you run just a few fits. -Once the model is stable and you want to run a lot of iterations, we recommend setting -to FALSE (even for quite a simple model, 1000 fits can easily exhaust 32GB of RAM).

    thin_ranks

    how much thinning should be applied to posterior samples before computing -ranks for SBC. Should be large enough to avoid any noticeable autocorrelation of the -thinned samples.

    chunk_size

    How many fits of datasets shall be processed in one batch -by the same worker. Relevant only when using parallel processing. -The larger the value, the smaller overhead there will be for parallel processing, but -the work may be distributed less equally across workers. We recommend setting this high -enough that a single batch takes at least several seconds, i.e. for small models, -you can often reduce computation time noticeably by increasing this value. -You can use options(SBC.min_chunk_size = value) to set a minimum chunk size globally. -See documentation of future.chunk.size argument for future.apply::future_lapply() for more details.

    cache_mode

    Type of caching of results, currently the only supported modes are -"none" (do not cache) and "results" where the whole results object is stored -and recomputed only when the hash of the backend or dataset changes.

    cache_location

    The filesystem location of cache. For cache_mode = "results" -this should be a name of a single file. If the file name does not end with -.rds, this extension is appended.

    globals

    A list of names of objects that are defined -in the global environment and need to present for the backend to work ( -if they are not already available in package). -It is added to the globals argument to future::future(), to make those -objects available on all workers.

    - -

    Value

    - -

    An object of class SBC_results().

    -

    Paralellization

    - -

    Parallel processing is supported via the future package, for most uses, it is most sensible -to just call plan(multisession) once in your R session and all -cores your computer will be used. For more details refer to the documentation -of the future package.

    -

    Thinning

    - -

    When using backends based on MCMC, there are two possible moments when -samples may need to be thinned. They can be thinned directly within the backend -and they may be thinned only to compute the ranks for SBC as specified by the -thin_ranks argument. The main reason those are separate is that computing the -ranks requires no or negligible autocorrelation while some autocorrelation -may be easily tolerated for summarising the fit results or assessing convergence. -In fact, thinning too aggressively in the backend may lead to overly noisy -estimates of posterior means, quantiles and the posterior::rhat() and -posterior::ess_tail() diagnostics. So for well-adapted Hamiltonian Monte-Carlo -chains (e.g. Stan-based backends), we recommend no thinning in the backend and -even value of thin_ranks between 6 and 10 is usually sufficient to remove -the residual autocorrelation. For a backend based on Metropolis-Hastings, -it might be sensible to thin quite aggressively already in the backend and -then have some additional thinning via thin_ranks.

    -

    Backends that don't require thining should implement SBC_backend_iid_samples() -or SBC_backend_default_thin_ranks() to avoid thinning by default.

    - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/data_for_ecdf_plots.html b/docs/reference/data_for_ecdf_plots.html index bc8ad1a..23f6850 100644 --- a/docs/reference/data_for_ecdf_plots.html +++ b/docs/reference/data_for_ecdf_plots.html @@ -1,67 +1,12 @@ - - - - - - - -Maybe not export in the end? Useful for debugging — data_for_ecdf_plots • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Maybe not export in the end? Useful for debugging — data_for_ecdf_plots • SBC - - - - + + -
    -
    - -
    - -
    +
    @@ -168,35 +107,32 @@

    Maybe not export in the end? Useful for debugging

    Maybe not export in the end? Useful for debugging

    -
    data_for_ecdf_plots(x, ..., prob = 0.95, gamma = NULL, K = NULL)
    - +
    +
    data_for_ecdf_plots(x, ..., prob = 0.95, gamma = NULL, K = NULL)
    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/default_chunk_size.html b/docs/reference/default_chunk_size.html index a616045..4ef615f 100644 --- a/docs/reference/default_chunk_size.html +++ b/docs/reference/default_chunk_size.html @@ -1,70 +1,15 @@ - - - - - - - -Determines the default chunk size. — default_chunk_size • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Determines the default chunk size. — default_chunk_size • SBC - - - - - - - - - - - - - + + -
    -
    - -
    - -
    +

    By default will make every worker process a single chunk. -You can set the options(SBC.min_chunk_size = value) to enforce a minimum +You can set the options(SBC.min_chunk_size = value) to enforce a minimum chunk size globally (chunk size can still be larger if you have substantially more fits to run than workers.

    -
    default_chunk_size(n_fits, n_workers = future::nbrOfWorkers())
    - +
    +
    default_chunk_size(n_fits, n_workers = future::nbrOfWorkers())
    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/default_cores_per_fit.html b/docs/reference/default_cores_per_fit.html index 3644788..aa2e35d 100644 --- a/docs/reference/default_cores_per_fit.html +++ b/docs/reference/default_cores_per_fit.html @@ -1,70 +1,15 @@ - - - - - - - -Determines the default cores per single fit. — default_cores_per_fit • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Determines the default cores per single fit. — default_cores_per_fit • SBC - - - - - - - - - - - - - + + -
    -
    - -
    - -
    +
    @@ -174,39 +113,36 @@

    Determines the default cores per single fit.

    running so many chains in parallel that there will be more chains than cores.

    -
    default_cores_per_fit(
    -  n_fits,
    -  total_cores = future::availableCores(),
    -  chunk_size = default_chunk_size(n_fits)
    -)
    - +
    +
    default_cores_per_fit(
    +  n_fits,
    +  total_cores = future::availableCores(),
    +  chunk_size = default_chunk_size(n_fits)
    +)
    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/empirical_coverage.html b/docs/reference/empirical_coverage.html index 8a91b58..759d64c 100644 --- a/docs/reference/empirical_coverage.html +++ b/docs/reference/empirical_coverage.html @@ -1,73 +1,18 @@ - - - - - - - -Compute observed coverage of posterior credible intervals. — empirical_coverage • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Compute observed coverage of posterior credible intervals. — empirical_coverage • SBC - - - - - - - - - - - - - + + -
    -
    - -
    - -
    +
    @@ -174,84 +113,75 @@

    Compute observed coverage of posterior credible intervals.

    Uses ranks to compute coverage and surrounding uncertainty of posterior credible intervals. The uncertainty is only approximate (treating coverage for each interval as a set of independent Bernoulli trials, while in fact they are not independent), so for making claims on presence/ -absence of detectable discrepancies we strongly recommend using plot_ecdf() or plot_ecdf_diff(). +absence of detectable discrepancies we strongly recommend using plot_ecdf() or plot_ecdf_diff(). The uncertainty about the coverage can however be useful for guiding decisions on whether more SBC steps should be performed (i.e. whether we can rule out that the coverage of the given backend differs too much for our purposes from the optimal value).

    -
    empirical_coverage(stats, width, prob = 0.95, interval_type = "central")
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - -
    stats

    a data.frame of rank statistics (e.g. as returned in the $stats component of SBC_results), -at minimum should have at least parameter, rank and max_rank columns)

    width

    a vector of values between 0 and 1 representing widths of credible intervals for -which we compute coverage.

    prob

    determines width of the uncertainty interval around the observed coverage

    inteval_type

    "central" to show coverage of central credible intervals -or "leftmost" to show coverage of leftmost credible intervals (i.e. the observed CDF).

    - -

    Value

    +
    +
    empirical_coverage(stats, width, prob = 0.95, interval_type = "central")
    +
    -

    A data.frame with columns parameter, width (width of the interval as given +

    +

    Arguments

    +
    stats
    +

    a data.frame of rank statistics (e.g. as returned in the $stats component of SBC_results), +at minimum should have at least variable, rank and max_rank columns)

    +
    width
    +

    a vector of values between 0 and 1 representing widths of credible intervals for +which we compute coverage.

    +
    prob
    +

    determines width of the uncertainty interval around the observed coverage

    +
    inteval_type
    +

    "central" to show coverage of central credible intervals +or "leftmost" to show coverage of leftmost credible intervals (i.e. the observed CDF).

    +
    +
    +

    Value

    +

    A data.frame with columns variable, width (width of the interval as given in the width parameter), width_represented the closest width that can be represented by the ranks in the input (any discrepancy needs to be judged against this rather than width), estimate - observed coverage for the interval, ci_low, ci_high the uncertainty interval around estimate (width of the interval is given by the prob argument).

    -

    Details

    - +
    +
    +

    Details

    Note that while coverage of central posterior intervals (with the default type = "central") is often of the biggest practical interest, perfect calibration of central intervals still leaves space for substantial problems with the model (e.g. if the posterior 25% - 50% intervals contain 50% of the true values and the posterior 50% - 75% interval never contains the true value, the central 50% interval still has the ideal 50% coverage), so investigating central -intervals should always be accompanied by checks with plot_ecdf() or plot_ecdf_diff() +intervals should always be accompanied by checks with plot_ecdf() or plot_ecdf_diff() or by using type = "leftmost", because if all leftmost credible intervals are well calibrated, then all intervals are well calibrated.

    -

    See also

    - - +
    +
    +

    See also

    + +
    +
    -
    - +
    - - + + diff --git a/docs/reference/generate_datasets.html b/docs/reference/generate_datasets.html index ca44aa5..b92fe47 100644 --- a/docs/reference/generate_datasets.html +++ b/docs/reference/generate_datasets.html @@ -1,67 +1,12 @@ - - - - - - - -Generate datasets. — generate_datasets • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Generate datasets. — generate_datasets • SBC - - - - + + -
    -
    - -
    - -
    +
    @@ -168,48 +107,46 @@

    Generate datasets.

    Generate datasets.

    -
    generate_datasets(generator, n_datasets)
    - -

    Arguments

    - - - - - - -
    generator

    a generator object - build e.g. via SBC_generator_function or -SBC_generator_brms.

    - -

    Value

    +
    +
    generate_datasets(generator, n_sims, n_datasets = NULL)
    +
    -

    object of class SBC_datasets -TODO: seed

    +
    +

    Arguments

    +
    generator
    +

    a generator object - build e.g. via SBC_generator_function or +SBC_generator_brms.

    +
    n_sims
    +

    the number of simulated datasets to use

    +
    n_datasets
    +

    DEPRECATED, use n_sims instead.

    +
    +
    +

    Value

    +

    object of class SBC_datasetsTODO: seed

    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/generated_quantities.html b/docs/reference/generated_quantities.html index 13b60e2..fc16f18 100644 --- a/docs/reference/generated_quantities.html +++ b/docs/reference/generated_quantities.html @@ -1,71 +1,16 @@ - - - - - - - -Create a definition of generated quantities evaluated in R. — generated_quantities • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Create a definition of generated quantities evaluated in R. — generated_quantities • SBC - - - - - - - - - - - - - + + -
    -
    - -
    - -
    +
    @@ -172,54 +111,46 @@

    Create a definition of generated quantities evaluated in R.

    When the expression contains non-library functions/objects, and parallel processing is enabled, those must be named in the .globals parameter (hopefully we'll be able to detect those -automatically in the future). Note that recompute_statistics() currently +automatically in the future). Note that recompute_SBC_statistics() currently does not use parallel processing, so .globals don't need to be set.

    -
    generated_quantities(..., .globals = list())
    - -

    Arguments

    - - - - - - - - - - -
    ...

    named expressions representing the quantitites

    .globals

    A list of names of objects that are defined -in the global environment and need to present for the gen. quants. to evaluate. -It is added to the globals argument to future::future(), to make those -objects available on all workers.

    +
    +
    generated_quantities(..., .globals = list())
    +
    +
    +

    Arguments

    +
    ...
    +

    named expressions representing the quantitites

    +
    .globals
    +

    A list of names of objects that are defined +in the global environment and need to present for the gen. quants. to evaluate. +It is added to the globals argument to future::future(), to make those +objects available on all workers.

    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/get_diagnostic_messages.html b/docs/reference/get_diagnostic_messages.html index 27375bb..ae2ad7f 100644 --- a/docs/reference/get_diagnostic_messages.html +++ b/docs/reference/get_diagnostic_messages.html @@ -1,67 +1,12 @@ - - - - - - - -Get diagnostic messages for SBC_results or other objects. — get_diagnostic_messages • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Get diagnostic messages for SBC_results or other objects. — get_diagnostic_messages • SBC - - - - + + -
    -
    - -
    - -
    +
    @@ -168,38 +107,36 @@

    Get diagnostic messages for SBC_results or other objects.

    Get diagnostic messages for SBC_results or other objects.

    -
    get_diagnostic_messages(x)
    - - -

    Value

    +
    +
    get_diagnostic_messages(x)
    +
    +
    +

    Value

    An object of class SBC_diagnostic_messages, inheriting a data.frame.

    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/get_diagnostics_messages.html b/docs/reference/get_diagnostics_messages.html deleted file mode 100644 index fa94d69..0000000 --- a/docs/reference/get_diagnostics_messages.html +++ /dev/null @@ -1,205 +0,0 @@ - - - - - - - - -Get diagnostic messages for SBC_results or other objects. — get_diagnostics_messages • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Get diagnostic messages for SBC_results or other objects.

    -
    - -
    get_diagnostics_messages(x)
    - - -

    Value

    - -

    An object of class SBC_diagnostics_messages, inheriting a data.frame.

    - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/guess_bins.html b/docs/reference/guess_bins.html deleted file mode 100644 index 977ef3e..0000000 --- a/docs/reference/guess_bins.html +++ /dev/null @@ -1,214 +0,0 @@ - - - - - - - - -Guess the number of bins for plot_rank_hist(). — guess_bins • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Guess the number of bins for plot_rank_hist().

    -
    - -
    guess_bins(max_rank, N)
    - -

    Arguments

    - - - - - - - - - - -
    max_rank

    the maximum rank observed

    N

    the number of ranks observed

    - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/guess_rank_hist_bins.html b/docs/reference/guess_rank_hist_bins.html new file mode 100644 index 0000000..d89da64 --- /dev/null +++ b/docs/reference/guess_rank_hist_bins.html @@ -0,0 +1,145 @@ + +Guess the number of bins for plot_rank_hist(). — guess_rank_hist_bins • SBC + + +
    +
    + + + +
    +
    + + +
    +

    Guess the number of bins for plot_rank_hist().

    +
    + +
    +
    guess_rank_hist_bins(max_rank, N)
    +
    + +
    +

    Arguments

    +
    max_rank
    +

    the maximum rank observed

    +
    N
    +

    the number of ranks observed

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.1.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/index.html b/docs/reference/index.html index baa4ecc..5c42ab1 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -1,66 +1,12 @@ - - - - - - - -Function reference • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Function reference • SBC - + + - - - -
    -
    - -
    - -
    +
    - - - - - - - - - - - +
    -

    Datasets

    -

    Generating datasets ready for use with SBC and working with them.

    + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + - - - + + + + - - - - - - - + - - - - + + - - - - - - - - - - - - + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + - - - - - - - - - - + + - - - - + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - + - - - -
    +

    Datasets

    +

    Generating datasets ready for use with SBC and working with them.

    +

    bind_datasets()

    Combine multiple datasets together.

    +

    generate_datasets()

    Generate datasets.

    +

    SBC_datasets()

    Create new SBC_datasets object.

    +

    `[`(<SBC_datasets>)

    Subset an SBC_datasets object.

    +

    SBC_generator_brms()

    Create a brms generator.

    +

    SBC_generator_custom()

    Wrap a function the creates a complete dataset.

    +

    SBC_generator_function()

    Generate datasets via a function that creates a single dataset.

    +

    calculate_prior_sd()

    Calculate prior standard deviation of a dataset

    -

    Backends

    -

    Represent various inference engines you can use with SBC.

    +
    +

    Backends

    +

    Represent various inference engines you can use with SBC.

    +

    SBC_backend_brms()

    Build a backend based on the brms package.

    +

    SBC_backend_brms_from_generator()

    Build a brms backend, reusing the compiled model from a previously created SBC_generator_brms object.

    +

    SBC_backend_cmdstan_sample()

    Backend based on sampling via cmdstanr.

    +

    SBC_backend_cmdstan_variational()

    Backend based on variational approximation via cmdstanr.

    +

    SBC_backend_default_thin_ranks()

    S3 generic to get backend-specific default thinning for rank computation.

    +

    SBC_backend_hash_for_cache()

    Get hash used to identify cached results.

    -

    SBC_backend_iid_samples()

    -

    S3 generic to let backends signal that they produced independent samples.

    +
    +

    SBC_backend_iid_draws()

    +

    S3 generic to let backends signal that they produced independent draws.

    SBC_backend_mock()

    A mock backend.

    +
    +

    SBC_backend_rjags()

    +

    Create a JAGS backend using rjags

    +

    SBC_backend_rstan_optimizing()

    +

    SBC backend using the optimizing method from rstan.

    SBC_backend_rstan_sample()

    SBC backend using the sampling method from rstan.

    +

    SBC_fit()

    Use backend to fit a model to data.

    +

    S3 generic using backend to fit a model to data.

    SBC_fit_to_diagnostics()

    S3 generic to get backend-specific diagnostics.

    -

    Computation & results

    -

    Functions related to running the SBC computation and handling the results.

    +
    +

    SBC_fit_to_draws_matrix()

    +

    S3 generic converting a fitted model to a draws_matrix object.

    +

    Computation & results

    +

    Functions related to running the SBC computation and handling the results.

    -

    compute_results()

    +
    +

    compute_SBC()

    Fit datasets and evaluate diagnostics and SBC metrics.

    +
    +

    check_all_SBC_diagnostics()

    +

    Check diagnostics and issue warnings when those fail.

    +

    SBC_results()

    +

    Create an SBC_results object

    +

    `[`(<SBC_results>)

    +

    Subset the results.

    generated_quantities()

    Create a definition of generated quantities evaluated in R.

    -

    statistics_from_single_fit()

    +
    +

    SBC_statistics_from_single_fit()

    Recompute SBC statistics given a single fit.

    -

    recompute_statistics()

    +
    +

    recompute_SBC_statistics()

    Recompute SBC statistics without refitting models.

    +

    bind_results()

    Combine multiple SBC results together.

    -

    check_all_SBC_diagnostics()

    -

    Check diagnostics and issue warnings when those fail.

    -

    plot_coverage()

    -

    Plot the observed coverage and its uncertainty

    -

    SBC_results()

    -

    SBC_results objects.

    -

    `[`(<SBC_results>)

    -

    Subset the results.

    +

    calculate_ranks_draws_matrix()

    Calculate ranks given parameter values within a posterior distribution.

    +

    Calculate ranks given variable values within a posterior distribution.

    get_diagnostic_messages()

    Get diagnostic messages for SBC_results or other objects.

    +

    SBC_fit_to_diagnostics()

    S3 generic to get backend-specific diagnostics.

    +

    default_chunk_size()

    Determines the default chunk size.

    +

    default_cores_per_fit()

    Determines the default cores per single fit.

    -

    Plotting & Summarising

    -

    Plotting and summarising results

    +
    +

    Plotting & Summarising

    +

    Plotting and summarising results

    +

    data_for_ecdf_plots()

    Maybe not export in the end? Useful for debugging

    +

    plot_ecdf() plot_ecdf_diff()

    Plot the ECDF-based plots.

    +

    plot_contraction()

    Prior/posterior contraction plot.

    -

    plot_coverage()

    -

    Plot the observed coverage and its uncertainty

    +
    +

    plot_coverage() plot_coverage_diff()

    +

    Plot the observed coverage and its uncertainty.

    plot_rank_hist()

    Plot rank histogram of an SBC results.

    +

    plot_sim_estimated()

    Plot the simulated "true" values versus posterior estimates

    -

    guess_bins()

    -

    Guess the number of bins for plot_rank_hist().

    +
    +

    guess_rank_hist_bins()

    +

    Guess the number of bins for plot_rank_hist().

    empirical_coverage()

    Compute observed coverage of posterior credible intervals.

    -

    Miscellaneous

    +
    +

    Examples

    +

    Functions to let you easily test the pacakge

    +
    +

    SBC_example_backend()

    +

    Construct a backend to be used in the examples.

    +

    SBC_example_generator()

    +

    Construct a generator used in the examples.

    +

    SBC_example_results()

    +

    Combine an example backend with an example generator to provide full +results that can be used to test other functions in the package.

    +

    SBC_print_example_model()

    +

    Print the Stan code of a model used in the examples.

    +

    Miscellaneous

    +

    wasserstein()

    wasserstein distance between binned samples

    +

    cjs_dist()

    Cumulative Jensen-Shannon divergence

    +

    combine_args()

    Combine two named lists and overwrite elements with the same name using the value from args2

    +

    max_diff()

    Max difference between binned samples with the same length

    +

    rank2unif()

    Distance between binned samples (rank for SBC) and discrete uniform

    +

    Distance between binned draws (rank for SBC) and discrete uniform

    set2set()

    Summarize relational property of overall prior and posterior samples

    - +

    Summarize relational property of overall prior and posterior draws

    +
    -
    - +
    - - + + diff --git a/docs/reference/max_diff.html b/docs/reference/max_diff.html index bb29a9d..7f474a4 100644 --- a/docs/reference/max_diff.html +++ b/docs/reference/max_diff.html @@ -1,67 +1,12 @@ - - - - - - - -Max difference between binned samples with the same length — max_diff • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Max difference between binned samples with the same length — max_diff • SBC - - - - + + -
    -
    - -
    - -
    +
    @@ -168,54 +107,45 @@

    Max difference between binned samples with the same length

    Max difference between binned samples with the same length

    -
    max_diff(x, y)
    - -

    Arguments

    - - - - - - - - - - - - - - -
    x

    numeric vector of density from first distribution

    y

    numeric vector of density from second distribution

    ...

    unused

    - -

    Value

    +
    +
    max_diff(x, y)
    +
    +
    +

    Arguments

    +
    x
    +

    numeric vector of density from first distribution

    +
    y
    +

    numeric vector of density from second distribution

    +
    ...
    +

    unused

    +
    +
    +

    Value

    distance value based on max difference

    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/plot_contraction.html b/docs/reference/plot_contraction.html index 7727313..00b5031 100644 --- a/docs/reference/plot_contraction.html +++ b/docs/reference/plot_contraction.html @@ -1,69 +1,14 @@ - - - - - - - -Prior/posterior contraction plot. — plot_contraction • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Prior/posterior contraction plot. — plot_contraction • SBC - - - - - - - - - - - - - + + -
    -
    - -
    - -
    +

    The rationale for this plot and its interpretaion is explained in Mike Betancourt's -Towards A Principled Bayesian Workflow.

    +Towards A Principled Bayesian Workflow.

    -
    plot_contraction(x, prior_sd, parameters = NULL, scale = "sd", alpha = 0.8)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - -
    x

    object containing results (a data.frame or SBC_results() object).

    prior_sd

    a named vector of prior standard deviations for your parameters. -Either pass in analytically obtained values or use calculate_prior_sd() to get an empirical estimate from -an SBC_datasets object.

    parameters

    parameters to show in the plot or NULL to show all -must correspond a field already computed in the results (most likely "mean" and "median").

    scale

    which scale of variability you want to see - either "sd" for standard deviation -or "var" for variance.

    alpha

    the alpha for the points

    - -

    Value

    +
    +
    plot_contraction(
    +  x,
    +  prior_sd,
    +  variables = NULL,
    +  scale = "sd",
    +  alpha = 0.8,
    +  parameters = NULL
    +)
    +
    +
    +

    Arguments

    +
    x
    +

    object containing results (a data.frame or SBC_results() object).

    +
    prior_sd
    +

    a named vector of prior standard deviations for your variables. +Either pass in analytically obtained values or use calculate_prior_sd() to get an empirical estimate from +an SBC_datasets object.

    +
    variables
    +

    variables to show in the plot or NULL to show all +must correspond a field already computed in the results (most likely "mean" and "median").

    +
    scale
    +

    which scale of variability you want to see - either "sd" for standard deviation +or "var" for variance.

    +
    alpha
    +

    the alpha for the points

    +
    parameters
    +

    DEPRECATED, use variables instead.

    +
    +
    +

    Value

    a ggplot2 plot object

    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/plot_coverage.html b/docs/reference/plot_coverage.html index e5141e4..999b68e 100644 --- a/docs/reference/plot_coverage.html +++ b/docs/reference/plot_coverage.html @@ -1,68 +1,18 @@ - - - - - - - -Plot the observed coverage and its uncertainty — plot_coverage • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Plot the observed coverage and its uncertainty. — plot_coverage • SBC - - - - + + -
    -
    - -
    - -
    +
    -

    Please refer to empirical_coverage() for details on computation -and limitations of this plot as well as details on the arguments.

    +

    plot_coverage will plot the observed coverage, +while plot_coverage_diff will show the difference between observed +and expected coverage. +Please refer to empirical_coverage() for details on computation +and limitations of this plot as well as details on the arguments. +See vignette("rank_visualizations") for +more details.

    -
    plot_coverage(x, parameters = NULL, prob = 0.95, interval_type = "central")
    -
    -# S3 method for SBC_results
    -plot_coverage(x, parameters = NULL, prob = 0.95, interval_type = "central")
    -
    -# S3 method for data.frame
    -plot_coverage(x, parameters = NULL, prob = 0.95, interval_type = "central")
    - -

    Arguments

    - - - - - - - - - - - - - - -
    x

    object containing results (a data.frame or SBC_results() object).

    parameters

    parameters to show in the plot or NULL to show all

    prob

    the with of the uncertainty interval to be shown

    - -

    Value

    +
    +
    plot_coverage(
    +  x,
    +  variables = NULL,
    +  prob = 0.95,
    +  interval_type = "central",
    +  parameters = NULL
    +)
    +
    +plot_coverage_diff(
    +  x,
    +  variables = NULL,
    +  prob = 0.95,
    +  interval_type = "central",
    +  parameters = NULL
    +)
    +
    +
    +

    Arguments

    +
    x
    +

    object containing results (a data.frame or SBC_results() object).

    +
    variables
    +

    variables to show in the plot or NULL to show all

    +
    prob
    +

    the with of the uncertainty interval to be shown

    +
    parameters
    +

    DEPRECATED. Use variables instead.

    +
    +
    +

    Value

    a ggplot2 plot object

    +
    +
    +

    See also

    +

    empirical_coverage

    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/plot_rank_hist.html b/docs/reference/plot_rank_hist.html index 1e4b3eb..25ceb2a 100644 --- a/docs/reference/plot_rank_hist.html +++ b/docs/reference/plot_rank_hist.html @@ -1,68 +1,17 @@ - - - - - - - -Plot rank histogram of an SBC results. — plot_rank_hist • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Plot rank histogram of an SBC results. — plot_rank_hist • SBC - - - - + + -
    -
    - -
    - -
    +
    -

    By default the support is for SBC_results objects and data frames in the same -format as the $stats element of SBC_results.

    +

    The expected uniform distribution and an approximate confidence interval +is also shown. The confidence interval cannot be taken too seriously +as it is derived assuming the bins are independent (which they are not). +The plot_ecdf() and plot_ecdf_diff() plots provide better confidence interval +but are somewhat less interpretable. See vignette("rank_visualizations") for +more details.

    -
    plot_rank_hist(x, parameters = NULL, bins = NULL, prob = 0.95, ...)
    - -

    Arguments

    - - - - - - -
    x

    Object supporting the plotting method.

    +
    +
    plot_rank_hist(
    +  x,
    +  variables = NULL,
    +  bins = NULL,
    +  prob = 0.95,
    +  ...,
    +  parameters = NULL
    +)
    +
    +
    +

    Arguments

    +
    x
    +

    Object supporting the plotting method.

    +
    variables
    +

    Names of variables to show

    +
    bins
    +

    number of bins to be used in the histogram, if left unspecified, +it is determined by guess_rank_hist_bins().

    +
    prob
    +

    The width of the approximate confidence interval shown.

    +
    +
    +

    Details

    +

    By default the support is for SBC_results objects and data frames in the same +format as the $stats element of SBC_results.

    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/plot_sim_estimated.html b/docs/reference/plot_sim_estimated.html index 01991ac..ca31d21 100644 --- a/docs/reference/plot_sim_estimated.html +++ b/docs/reference/plot_sim_estimated.html @@ -1,67 +1,12 @@ - - - - - - - -Plot the simulated "true" values versus posterior estimates — plot_sim_estimated • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Plot the simulated "true" values versus posterior estimates — plot_sim_estimated • SBC - - - - + + -
    -
    - -
    - -
    +
    @@ -168,70 +107,60 @@

    Plot the simulated "true" values versus posterior estimates

    Plot the simulated "true" values versus posterior estimates

    -
    plot_sim_estimated(
    -  x,
    -  parameters = NULL,
    -  estimate = "mean",
    -  uncertainty = c("q5", "q95"),
    -  alpha = 0.8
    -)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - -
    x

    object containing results (a data.frame or SBC_results() object).

    parameters

    parameters to show in the plot or NULL to show all

    estimate

    which estimate to use for the central tendency, -must correspond a field already computed in the results (most likely "mean" and "median").

    uncertainty

    which estimates to use for uncertainty (a character vector of length 2) -must correspond a field already computed in the results. Pass NULL to avoid showing uncertainty at all.

    alpha

    the alpha for the points and uncertainty intervals

    - -

    Value

    +
    +
    plot_sim_estimated(
    +  x,
    +  variables = NULL,
    +  estimate = "mean",
    +  uncertainty = c("q5", "q95"),
    +  alpha = NULL,
    +  parameters = NULL
    +)
    +
    +
    +

    Arguments

    +
    x
    +

    object containing results (a data.frame or SBC_results() object).

    +
    variables
    +

    variables to show in the plot or NULL to show all

    +
    estimate
    +

    which estimate to use for the central tendency, +must correspond a field already computed in the results (most likely "mean" and "median").

    +
    uncertainty
    +

    which estimates to use for uncertainty (a character vector of length 2) +must correspond a field already computed in the results. Pass NULL to avoid showing uncertainty at all.

    +
    alpha
    +

    the alpha for the points and uncertainty intervals

    +
    parameters
    +

    DEPRECATED, use variables instead

    +
    +
    +

    Value

    a ggplot2 plot object

    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/rank2unif.html b/docs/reference/rank2unif.html index 0edcb6c..d22fc3d 100644 --- a/docs/reference/rank2unif.html +++ b/docs/reference/rank2unif.html @@ -1,67 +1,12 @@ - - - - - - - -Distance between binned samples (rank for SBC) and discrete uniform — rank2unif • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Distance between binned draws (rank for SBC) and discrete uniform — rank2unif • SBC - - - - + + -
    -
    - -
    - -
    +
    -

    Distance between binned samples (rank for SBC) and discrete uniform

    +

    Distance between binned draws (rank for SBC) and discrete uniform

    -
    rank2unif(results, par, bins = 20)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - -
    par

    names of parameter to plot

    bins

    number of bins to use for summary

    ranks

    array of dimension (n_iter, n_pars) where n_iter=number of posterior draw iterations, n_pars the number of parameters of interest

    thin

    integer in which thinning was applied

    +
    +
    rank2unif(results, par, bins = 20)
    +
    +
    +

    Arguments

    +
    par
    +

    names of parameter to plot

    +
    bins
    +

    number of bins to use for summary

    +
    ranks
    +

    array of dimension (n_iter, n_pars) where n_iter=number of posterior draw iterations, n_pars the number of parameters of interest

    +
    thin
    +

    integer in which thinning was applied

    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/rdunif.html b/docs/reference/rdunif.html index f7f51d9..b258a5d 100644 --- a/docs/reference/rdunif.html +++ b/docs/reference/rdunif.html @@ -1,68 +1,13 @@ - - - - - - - -Discrete uniform distribution allowing for varying lower and upper bounds. — rdunif • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Discrete uniform distribution allowing for varying lower and upper bounds. — rdunif • SBC - - - - + + -
    -
    - -
    - -
    +
    @@ -170,35 +109,32 @@

    Discrete uniform distribution allowing for varying lower and upper bounds.

    -
    rdunif(n, a, b)
    - +
    +
    rdunif(n, a, b)
    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/recompute_SBC_statistics.html b/docs/reference/recompute_SBC_statistics.html new file mode 100644 index 0000000..945feaf --- /dev/null +++ b/docs/reference/recompute_SBC_statistics.html @@ -0,0 +1,169 @@ + +Recompute SBC statistics without refitting models. — recompute_SBC_statistics • SBC + + +
    +
    + + + +
    +
    + + +
    +

    Recompute SBC statistics without refitting models.

    +
    + +
    +
    recompute_SBC_statistics(
    +  old_results,
    +  datasets,
    +  backend,
    +  thin_ranks = SBC_backend_default_thin_ranks(backend),
    +  ensure_num_ranks_divisor = 2,
    +  gen_quants = NULL
    +)
    +
    + +
    +

    Arguments

    +
    datasets
    +

    an object of class SBC_datasets

    +
    backend
    +

    backend used to fit the results. Used to pull various defaults +and other setting influencing the computation of statistics.

    +
    thin_ranks
    +

    how much thinning should be applied to posterior draws before computing +ranks for SBC. Should be large enough to avoid any noticeable autocorrelation of the +thinned draws See details below.

    +
    ensure_num_ranks_divisor
    +

    Potentially drop some posterior samples to +ensure that this number divides the total number of SBC ranks (see Details).

    +
    +
    +

    Value

    +

    An S3 object of class SBC_results with updated $stats and $default_diagnostics fields.

    +
    +
    +

    Details

    +

    Useful for example to recompute SBC ranks with a different choice of thin_ranks +or added generated quantities.

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.1.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/recompute_statistics-deprecated.html b/docs/reference/recompute_statistics-deprecated.html new file mode 100644 index 0000000..5cab748 --- /dev/null +++ b/docs/reference/recompute_statistics-deprecated.html @@ -0,0 +1,139 @@ + +Recompute SBC statistics without refitting models. — recompute_statistics-deprecated • SBC + + +
    +
    + + + +
    +
    + + +
    +

    Delegates directly to recompute_SBC_statistics().

    +
    + + +
    +

    See also

    + +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.1.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/recompute_statistics.html b/docs/reference/recompute_statistics.html deleted file mode 100644 index f573d68..0000000 --- a/docs/reference/recompute_statistics.html +++ /dev/null @@ -1,222 +0,0 @@ - - - - - - - - -Recompute SBC statistics without refitting models. — recompute_statistics • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Useful for example to recompute SBC ranks with a different choice of thin_ranks -or added generated quantities.

    -
    - -
    recompute_statistics(
    -  old_results,
    -  datasets,
    -  backend,
    -  thin_ranks = SBC_backend_default_thin_ranks(backend),
    -  gen_quants = NULL
    -)
    - -

    Arguments

    - - - - - - -
    backend

    backend used to fit the results. Used to pull various defaults -and other setting influencing the computation of statistics.

    - -

    Value

    - -

    An S3 object of class SBC_results with updated $stats and $default_diagnostics fields.

    - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/set2set.html b/docs/reference/set2set.html index 94187ea..eeaba60 100644 --- a/docs/reference/set2set.html +++ b/docs/reference/set2set.html @@ -1,67 +1,12 @@ - - - - - - - -Summarize relational property of overall prior and posterior samples — set2set • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Summarize relational property of overall prior and posterior draws — set2set • SBC - - - - + + -
    -
    - -
    - -
    +
    -

    Summarize relational property of overall prior and posterior samples

    +

    Summarize relational property of overall prior and posterior draws

    -
    set2set(priors, posteriors, par, bins = 20)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - -
    priors

    A posterior::draws_rvars of dimension(n_iterations=1, n_chains=n_sbc_iterations, n_variables=n_variables) which stores prior samples

    posteriors

    A posterior::draws_Rvars of dimension(n_iterations=n_posterior_samples, n_chains=n_sbc_iterations, n_variables=n_variables), which stores fitted posterior samples

    par

    names of parameter to summarize

    bins

    number of bins for prior and post density

    +
    +
    set2set(priors, posteriors, par, bins = 20)
    +
    +
    +

    Arguments

    +
    priors
    +

    A posterior::draws_rvars of dimension(n_iterations=1, n_chains=n_sbc_iterations, n_variables=n_variables) which stores prior draws

    +
    posteriors
    +

    A posterior::draws_Rvars of dimension(n_iterations=n_posterior_draws, n_chains=n_sbc_iterations, n_variables=n_variables), which stores fitted posterior draws

    +
    par
    +

    names of parameter to summarize

    +
    bins
    +

    number of bins for prior and post density

    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/statistics_from_single_fit.html b/docs/reference/statistics_from_single_fit.html deleted file mode 100644 index 28646b4..0000000 --- a/docs/reference/statistics_from_single_fit.html +++ /dev/null @@ -1,216 +0,0 @@ - - - - - - - - -Recompute SBC statistics given a single fit. — statistics_from_single_fit • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Potentially useful for doing some advanced stuff, but should not -be used in regular workflow. Use recompute_statistics() to update -an [SBC_results] objects with different thin_ranks or other settings.

    -
    - -
    statistics_from_single_fit(
    -  fit,
    -  parameters,
    -  generated,
    -  thin_ranks,
    -  gen_quants,
    -  backend
    -)
    - - -

    See also

    - - - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.6.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/sub-.SBC_datasets.html b/docs/reference/sub-.SBC_datasets.html index f00dff0..6b89bf7 100644 --- a/docs/reference/sub-.SBC_datasets.html +++ b/docs/reference/sub-.SBC_datasets.html @@ -1,67 +1,12 @@ - - - - - - - -Subset an SBC_datasets object. — [.SBC_datasets • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Subset an SBC_datasets object. — [.SBC_datasets • SBC - - - - + + -
    -
    - -
    - -
    +
    @@ -168,36 +107,33 @@

    Subset an SBC_datasets object.

    Subset an SBC_datasets object.

    -
    # S3 method for SBC_datasets
    -[(x, indices)
    - +
    +
    # S3 method for SBC_datasets
    +[(x, indices)
    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/sub-.SBC_results.html b/docs/reference/sub-.SBC_results.html index 2dc8d53..57f7605 100644 --- a/docs/reference/sub-.SBC_results.html +++ b/docs/reference/sub-.SBC_results.html @@ -1,67 +1,12 @@ - - - - - - - -Subset the results. — [.SBC_results • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Subset the results. — [.SBC_results • SBC - - - - + + -
    -
    - -
    - -
    +
    @@ -168,45 +107,39 @@

    Subset the results.

    Subset the results.

    -
    # S3 method for SBC_results
    -[(x, indices)
    - -

    Arguments

    - - - - - - -
    indices

    integer indices or a binary vector of the same length as the number fits, -selecting which fits to keep.

    +
    +
    # S3 method for SBC_results
    +[(x, indices)
    +
    +
    +

    Arguments

    +
    indices
    +

    integer indices or a binary vector of the same length as the number fits, +selecting which fits to keep.

    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/wasserstein.html b/docs/reference/wasserstein.html index 1bdd03e..c5c8d60 100644 --- a/docs/reference/wasserstein.html +++ b/docs/reference/wasserstein.html @@ -1,67 +1,12 @@ - - - - - - - -wasserstein distance between binned samples — wasserstein • SBC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -wasserstein distance between binned samples — wasserstein • SBC - - - - + + -
    -
    - -
    - -
    +
    @@ -168,54 +107,45 @@

    wasserstein distance between binned samples

    wasserstein distance between binned samples

    -
    wasserstein(x, y)
    - -

    Arguments

    - - - - - - - - - - - - - - -
    x

    numeric vector of density from first distribution

    y

    numeric vector of density from second distribution

    ...

    unused

    - -

    Value

    +
    +
    wasserstein(x, y)
    +
    +
    +

    Arguments

    +
    x
    +

    numeric vector of density from first distribution

    +
    y
    +

    numeric vector of density from second distribution

    +
    ...
    +

    unused

    +
    +
    +

    Value

    distance value based on max difference

    +
    +
    -
    - +
    - - + + diff --git a/docs/sitemap.xml b/docs/sitemap.xml new file mode 100644 index 0000000..a067956 --- /dev/null +++ b/docs/sitemap.xml @@ -0,0 +1,225 @@ + + + + https://hyunjimoon.github.io/SBC/404.html + + + https://hyunjimoon.github.io/SBC/articles/bad_parametrization.html + + + https://hyunjimoon.github.io/SBC/articles/brms.html + + + https://hyunjimoon.github.io/SBC/articles/computational_algorithm1.html + + + https://hyunjimoon.github.io/SBC/articles/discrete_vars.html + + + https://hyunjimoon.github.io/SBC/articles/implementing_backends.html + + + https://hyunjimoon.github.io/SBC/articles/index.html + + + https://hyunjimoon.github.io/SBC/articles/indexing.html + + + https://hyunjimoon.github.io/SBC/articles/limits_of_SBC.html + + + https://hyunjimoon.github.io/SBC/articles/rank_visualizations.html + + + https://hyunjimoon.github.io/SBC/articles/rejection_sampling.html + + + https://hyunjimoon.github.io/SBC/articles/SBC.html + + + https://hyunjimoon.github.io/SBC/articles/small_model_workflow.html + + + https://hyunjimoon.github.io/SBC/authors.html + + + https://hyunjimoon.github.io/SBC/index.html + + + https://hyunjimoon.github.io/SBC/LICENSE-text.html + + + https://hyunjimoon.github.io/SBC/LICENSE.html + + + https://hyunjimoon.github.io/SBC/reference/bind_datasets.html + + + https://hyunjimoon.github.io/SBC/reference/bind_results.html + + + https://hyunjimoon.github.io/SBC/reference/calculate_prior_sd.html + + + https://hyunjimoon.github.io/SBC/reference/calculate_ranks_draws_matrix.html + + + https://hyunjimoon.github.io/SBC/reference/check_all_SBC_diagnostics.html + + + https://hyunjimoon.github.io/SBC/reference/cjs_dist.html + + + https://hyunjimoon.github.io/SBC/reference/combine_args.html + + + https://hyunjimoon.github.io/SBC/reference/compute_results-deprecated.html + + + https://hyunjimoon.github.io/SBC/reference/compute_SBC.html + + + https://hyunjimoon.github.io/SBC/reference/data_for_ecdf_plots.html + + + https://hyunjimoon.github.io/SBC/reference/default_chunk_size.html + + + https://hyunjimoon.github.io/SBC/reference/default_cores_per_fit.html + + + https://hyunjimoon.github.io/SBC/reference/ECDF-plots.html + + + https://hyunjimoon.github.io/SBC/reference/empirical_coverage.html + + + https://hyunjimoon.github.io/SBC/reference/generated_quantities.html + + + https://hyunjimoon.github.io/SBC/reference/generate_datasets.html + + + https://hyunjimoon.github.io/SBC/reference/get_diagnostic_messages.html + + + https://hyunjimoon.github.io/SBC/reference/guess_rank_hist_bins.html + + + https://hyunjimoon.github.io/SBC/reference/index.html + + + https://hyunjimoon.github.io/SBC/reference/max_diff.html + + + https://hyunjimoon.github.io/SBC/reference/plot_contraction.html + + + https://hyunjimoon.github.io/SBC/reference/plot_coverage.html + + + https://hyunjimoon.github.io/SBC/reference/plot_rank_hist.html + + + https://hyunjimoon.github.io/SBC/reference/plot_sim_estimated.html + + + https://hyunjimoon.github.io/SBC/reference/rank2unif.html + + + https://hyunjimoon.github.io/SBC/reference/rdunif.html + + + https://hyunjimoon.github.io/SBC/reference/recompute_SBC_statistics.html + + + https://hyunjimoon.github.io/SBC/reference/recompute_statistics-deprecated.html + + + https://hyunjimoon.github.io/SBC/reference/SBC-deprecated.html + + + https://hyunjimoon.github.io/SBC/reference/SBC_backend_brms.html + + + https://hyunjimoon.github.io/SBC/reference/SBC_backend_brms_from_generator.html + + + https://hyunjimoon.github.io/SBC/reference/SBC_backend_cmdstan_sample.html + + + https://hyunjimoon.github.io/SBC/reference/SBC_backend_cmdstan_variational.html + + + https://hyunjimoon.github.io/SBC/reference/SBC_backend_default_thin_ranks.html + + + https://hyunjimoon.github.io/SBC/reference/SBC_backend_hash_for_cache.html + + + https://hyunjimoon.github.io/SBC/reference/SBC_backend_iid_draws.html + + + https://hyunjimoon.github.io/SBC/reference/SBC_backend_mock.html + + + https://hyunjimoon.github.io/SBC/reference/SBC_backend_rjags.html + + + https://hyunjimoon.github.io/SBC/reference/SBC_backend_rstan_optimizing.html + + + https://hyunjimoon.github.io/SBC/reference/SBC_backend_rstan_sample.html + + + https://hyunjimoon.github.io/SBC/reference/SBC_datasets.html + + + https://hyunjimoon.github.io/SBC/reference/SBC_example_backend.html + + + https://hyunjimoon.github.io/SBC/reference/SBC_example_generator.html + + + https://hyunjimoon.github.io/SBC/reference/SBC_example_results.html + + + https://hyunjimoon.github.io/SBC/reference/SBC_fit.html + + + https://hyunjimoon.github.io/SBC/reference/SBC_fit_to_diagnostics.html + + + https://hyunjimoon.github.io/SBC/reference/SBC_fit_to_draws_matrix.html + + + https://hyunjimoon.github.io/SBC/reference/SBC_generator_brms.html + + + https://hyunjimoon.github.io/SBC/reference/SBC_generator_custom.html + + + https://hyunjimoon.github.io/SBC/reference/SBC_generator_function.html + + + https://hyunjimoon.github.io/SBC/reference/SBC_print_example_model.html + + + https://hyunjimoon.github.io/SBC/reference/SBC_results.html + + + https://hyunjimoon.github.io/SBC/reference/SBC_statistics_from_single_fit.html + + + https://hyunjimoon.github.io/SBC/reference/set2set.html + + + https://hyunjimoon.github.io/SBC/reference/sub-.SBC_datasets.html + + + https://hyunjimoon.github.io/SBC/reference/sub-.SBC_results.html + + + https://hyunjimoon.github.io/SBC/reference/wasserstein.html + + diff --git a/inst/normal_bad.jags b/inst/normal_bad.jags new file mode 100644 index 0000000..1c0f56d --- /dev/null +++ b/inst/normal_bad.jags @@ -0,0 +1,7 @@ +model { + for(i in 1:N) { + y[i] ~ dnorm(mu, sigma); + } + mu ~ dnorm(0, 1); + sigma ~ dnorm(0, 1) T(0,); +} diff --git a/inst/normal_bad.stan b/inst/normal_bad.stan new file mode 100644 index 0000000..3880668 --- /dev/null +++ b/inst/normal_bad.stan @@ -0,0 +1,16 @@ +data { + int N; + vector[N] y; +} + +parameters { + real mu; + real sigma; +} + +model { + y ~ normal(mu, 1/sigma^2); + mu ~ normal(0, 1); + sigma ~ normal(0, 1); +} + diff --git a/inst/normal_sd.jags b/inst/normal_sd.jags new file mode 100644 index 0000000..1002b19 --- /dev/null +++ b/inst/normal_sd.jags @@ -0,0 +1,7 @@ +model { + for(i in 1:N) { + y[i] ~ dnorm(mu, 1/sigma^2); + } + mu ~ dnorm(0, 1); + sigma ~ dnorm(0, 1) T(0,); +} diff --git a/inst/normal_sd.stan b/inst/normal_sd.stan new file mode 100644 index 0000000..c7c8ca9 --- /dev/null +++ b/inst/normal_sd.stan @@ -0,0 +1,16 @@ +data { + int N; + vector[N] y; +} + +parameters { + real mu; + real sigma; +} + +model { + y ~ normal(mu, sigma); + mu ~ normal(0, 1); + sigma ~ normal(0, 1); +} + diff --git a/man/ECDF-plots.Rd b/man/ECDF-plots.Rd index 13ca9d7..7200c58 100644 --- a/man/ECDF-plots.Rd +++ b/man/ECDF-plots.Rd @@ -7,35 +7,37 @@ \usage{ plot_ecdf( x, - parameters = NULL, + variables = NULL, K = NULL, gamma = NULL, prob = 0.95, size = 1, alpha = 0.33, - ... + ..., + parameters = NULL ) plot_ecdf_diff( x, - parameters = NULL, + variables = NULL, K = NULL, gamma = NULL, prob = 0.95, size = 1, alpha = 0.33, - ... + ..., + parameters = NULL ) } \arguments{ \item{x}{object supporting the \code{\link[=data_for_ecdf_plots]{data_for_ecdf_plots()}} method.} -\item{parameters}{optional subset of parameters to show in the plot} +\item{variables}{optional subset of variables to show in the plot} \item{K}{number of uniformly spaced evaluation points for the ECDF or ECDFs. Affects the granularity of the plot and can significantly speed up the computation of the simultaneous confidence bands. Defaults to the smaller of number of -ranks per parameter and the maximum rank.} +ranks per variable and the maximum rank.} \item{gamma}{TODO} @@ -47,8 +49,12 @@ ranks per parameter and the maximum rank.} \item{...}{additional arguments passed to \code{\link[=data_for_ecdf_plots]{data_for_ecdf_plots()}}. Most notably, if \code{x} is matrix, a \code{max_rank} parameter needs to be given.} + +\item{parameters}{DEPRECATED, use \code{variables} instead.} } \description{ +See \code{vignette("rank_visualizations")} for +more details. See the methods for \code{\link[=data_for_ecdf_plots]{data_for_ecdf_plots()}} for available data formats. } \details{ diff --git a/man/SBC-deprecated.Rd b/man/SBC-deprecated.Rd new file mode 100644 index 0000000..711aff2 --- /dev/null +++ b/man/SBC-deprecated.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/SBC-deprecated.R, R/results.R +\name{SBC-deprecated} +\alias{SBC-deprecated} +\alias{compute_results} +\alias{recompute_statistics} +\title{Deprecated functions in package \pkg{SBC}.} +\usage{ +compute_results(...) + +recompute_statistics(...) +} +\description{ +The functions listed below are deprecated and will be defunct in +the near future. When possible, alternative functions with similar +functionality are also mentioned. Help pages for deprecated functions are +available at \code{help("-deprecated")}. +} +\section{\code{compute_results}}{ + +Instead of \code{compute_results}, use \code{\link{compute_SBC}}. +} + +\section{\code{recompute_statistics}}{ + +Instead of \code{recompute_statistics}, use \code{\link{recompute_SBC_statistics}}. +} + +\keyword{internal} diff --git a/man/SBC_backend_brms.Rd b/man/SBC_backend_brms.Rd index b549886..97ea664 100644 --- a/man/SBC_backend_brms.Rd +++ b/man/SBC_backend_brms.Rd @@ -4,12 +4,15 @@ \alias{SBC_backend_brms} \title{Build a backend based on the \code{brms} package.} \usage{ -SBC_backend_brms(..., template_dataset) +SBC_backend_brms(..., template_data, template_dataset = NULL) } \arguments{ \item{...}{arguments passed to \code{brm}.} -\item{template_dataset}{a representative dataset that can be used to generate code.} +\item{template_data}{a representative value for the \code{data} argument in \code{brm} +that can be used to generate code.} + +\item{template_dataset}{DEPRECATED. Use \code{template_data}} } \description{ Build a backend based on the \code{brms} package. diff --git a/man/SBC_backend_default_thin_ranks.Rd b/man/SBC_backend_default_thin_ranks.Rd index c139f94..4597b90 100644 --- a/man/SBC_backend_default_thin_ranks.Rd +++ b/man/SBC_backend_default_thin_ranks.Rd @@ -11,5 +11,5 @@ SBC_backend_default_thin_ranks(backend) } \description{ The default implementation plays it relatively safe and returns 10, unless -\code{\link[=SBC_backend_iid_samples]{SBC_backend_iid_samples()}} returns \code{TRUE} in which case it returns 1. +\code{\link[=SBC_backend_iid_draws]{SBC_backend_iid_draws()}} returns \code{TRUE} in which case it returns 1. } diff --git a/man/SBC_backend_iid_samples.Rd b/man/SBC_backend_iid_draws.Rd similarity index 56% rename from man/SBC_backend_iid_samples.Rd rename to man/SBC_backend_iid_draws.Rd index 00cfa9f..1da8682 100644 --- a/man/SBC_backend_iid_samples.Rd +++ b/man/SBC_backend_iid_draws.Rd @@ -1,22 +1,22 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/backends.R -\name{SBC_backend_iid_samples} -\alias{SBC_backend_iid_samples} -\alias{SBC_backend_iid_samples.default} -\title{S3 generic to let backends signal that they produced independent samples.} +\name{SBC_backend_iid_draws} +\alias{SBC_backend_iid_draws} +\alias{SBC_backend_iid_draws.default} +\title{S3 generic to let backends signal that they produced independent draws.} \usage{ -SBC_backend_iid_samples(backend) +SBC_backend_iid_draws(backend) -\method{SBC_backend_iid_samples}{default}(backend) +\method{SBC_backend_iid_draws}{default}(backend) } \arguments{ \item{backend}{to check} } \description{ Most backends (e.g. those based on variatns of MCMC) don't produce -independent samples and thus diagnostics like Rhat and ESS are important -and samples may need thinning. Backends that already produce independent -samples (e.g. ADVI/optimizing) can implement this method to return \code{TRUE} +independent draws and thus diagnostics like Rhat and ESS are important +and draws may need thinning. Backends that already produce independent +draws (e.g. ADVI/optimizing) can implement this method to return \code{TRUE} to signal this is the case. If this method returns \code{TRUE}, ESS and Rhat will always attain their best possible values and \code{\link[=SBC_backend_default_thin_ranks]{SBC_backend_default_thin_ranks()}} will return \code{1}. diff --git a/man/SBC_backend_rjags.Rd b/man/SBC_backend_rjags.Rd new file mode 100644 index 0000000..120c9b0 --- /dev/null +++ b/man/SBC_backend_rjags.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/backend-rjags.R +\name{SBC_backend_rjags} +\alias{SBC_backend_rjags} +\title{Create a JAGS backend using \code{rjags}} +\usage{ +SBC_backend_rjags( + file, + n.iter, + n.burnin, + variable.names, + thin = 1, + na.rm = TRUE, + ... +) +} +\arguments{ +\item{file}{model file or connection to model code (passed to \code{\link[rjags:jags.model]{rjags::jags.model()}})} + +\item{n.iter}{number of iterations for sampling (passed to [rjags::coda.samples())} + +\item{n.burnin}{number of iterations used for burnin} + +\item{variable.names}{names of variables to monitor (passed to \code{\link[rjags:coda.samples]{rjags::coda.samples()}})} + +\item{thin}{thinning (passed to \code{\link[rjags:coda.samples]{rjags::coda.samples()}})} + +\item{na.rm}{whether to omit variables containing NA (passed to \code{\link[rjags:coda.samples]{rjags::coda.samples()}})} + +\item{...}{additional optional arguments passed to \code{\link[rjags:jags.model]{rjags::jags.model()}} +\itemize{ +\item most notably \code{n.chains}, \code{n.adapt} and \code{inits}. +}} +} +\description{ +Create a JAGS backend using \code{rjags} +} diff --git a/man/SBC_backend_rstan_optimizing.Rd b/man/SBC_backend_rstan_optimizing.Rd new file mode 100644 index 0000000..6ca81de --- /dev/null +++ b/man/SBC_backend_rstan_optimizing.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/backends.R +\name{SBC_backend_rstan_optimizing} +\alias{SBC_backend_rstan_optimizing} +\title{SBC backend using the \code{optimizing} method from \code{rstan}.} +\usage{ +SBC_backend_rstan_optimizing(model, ..., n_retries_hessian = 1) +} +\arguments{ +\item{model}{a \code{stanmodel} object (created via \code{rstan::stan_model})} + +\item{...}{other arguments passed to \code{optimizing} (number of iterations, ...). +Argument \code{data} cannot be set this way as they need to be +controlled by the package.} + +\item{n_retries_hessian}{the number of times the backend is allow to retry optimization +(with different seeed) to produce a usable Hessian that can produce draws. In some cases, +the Hessian may be numerically unstable and not be positive definite.} +} +\description{ +SBC backend using the \code{optimizing} method from \code{rstan}. +} diff --git a/man/SBC_datasets.Rd b/man/SBC_datasets.Rd index f2ec253..d81420c 100644 --- a/man/SBC_datasets.Rd +++ b/man/SBC_datasets.Rd @@ -4,14 +4,16 @@ \alias{SBC_datasets} \title{Create new \code{SBC_datasets} object.} \usage{ -SBC_datasets(parameters, generated) +SBC_datasets(variables, generated, parameters = NULL) } \arguments{ -\item{parameters}{samples of "true" values of unobserved parameters. +\item{variables}{draws of "true" values of unobserved parameters or other derived variables. An object of class \code{draws_matrix} (from the \code{posterior} package)} \item{generated}{a list of objects that can be passed as data to the backend you plan to use. (e.g. list of values for Stan-based backends, a data frame for \code{SBC_backend_brms})} + +\item{parameters}{DEPRECATED. Use variables instead.} } \description{ In most cases, you may want to use \code{generate_datasets} to build the object, but diff --git a/man/SBC_example_backend.Rd b/man/SBC_example_backend.Rd new file mode 100644 index 0000000..9958c46 --- /dev/null +++ b/man/SBC_example_backend.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/example.R +\name{SBC_example_backend} +\alias{SBC_example_backend} +\title{Construct a backend to be used in the examples.} +\usage{ +SBC_example_backend( + example = c("normal_sd", "normal_bad"), + interface = c("rstan", "cmdstanr", "rjags") +) +} +\arguments{ +\item{example}{name of the example model. \code{normal_sd} is a simple model fitting +a normal distribution parametrized as mean and standard deviation. +\code{normal_bad} is a model that \emph{tries} to implement the \code{normal_sd} model, +but assumes an incorrect parametrization of the normal distribution. +For Stan-based backends, the model is written as if Stan parametrized +normal distribution with precision (while Stan uses sd), for JAGS-based +backends the model is written as if JAGS parametrized normal distribution +with sd (while JAGS uses precision).} + +\item{interface}{name of the interface to be used to fit the model} +} +\description{ +Note that this will involve compiling a Stan model and may take a while. +} diff --git a/man/SBC_example_generator.Rd b/man/SBC_example_generator.Rd new file mode 100644 index 0000000..ffadb26 --- /dev/null +++ b/man/SBC_example_generator.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/example.R +\name{SBC_example_generator} +\alias{SBC_example_generator} +\title{Construct a generator used in the examples.} +\usage{ +SBC_example_generator(example = c("normal"), N = 100) +} +\arguments{ +\item{example}{name of example} + +\item{N}{size of the dataset the generator should simulate} +} +\value{ +an object that can be passed to \code{\link[=generate_datasets]{generate_datasets()}} +} +\description{ +Construct a generator used in the examples. +} diff --git a/man/SBC_example_results.Rd b/man/SBC_example_results.Rd new file mode 100644 index 0000000..d18e8e4 --- /dev/null +++ b/man/SBC_example_results.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/example.R +\name{SBC_example_results} +\alias{SBC_example_results} +\title{Combine an example backend with an example generator to provide full +results that can be used to test other functions in the package.} +\usage{ +SBC_example_results( + example = c("normal_ok", "normal_bad", "visualizations"), + interface = c("rstan", "cmdstanr", "rjags"), + N = 100, + n_sims = 50 +) +} +\arguments{ +\item{example}{\itemize{ +\item name of the example. \code{normal_ok} is an example +where the generator matches the model +(using the \code{normal} generator and \code{normal_sd} backend), while +\code{normal_bad} is an example with a mismatch between the generator and backend +that manifests in SBC (\code{normal_bad} combines the \code{normal} generator with +\code{normal_bad} backend). \code{visualizations} creates a purely artificial results +that are meant to showcase the built-in plots (the \code{interface} parameter will +be ignored). +}} + +\item{interface}{name of the interface to be used for the backend} + +\item{N}{number of datapoints to simulate from the generator for each simulation} + +\item{n_sims}{number of simulations to perform} +} +\description{ +Except for \code{example = "visualizations"}, all examples will actually +compile and fit Stan models and thus may take a while to complete. +} diff --git a/man/SBC_fit.Rd b/man/SBC_fit.Rd index 7713c32..ab0678f 100644 --- a/man/SBC_fit.Rd +++ b/man/SBC_fit.Rd @@ -2,12 +2,12 @@ % Please edit documentation in R/backends.R \name{SBC_fit} \alias{SBC_fit} -\title{Use backend to fit a model to data.} +\title{S3 generic using backend to fit a model to data.} \usage{ SBC_fit(backend, generated, cores) } \description{ -S3 generic, needs to be implemented by all backends. +Needs to be implemented by all backends. All implementations have to return an object for which you can safely call \code{\link[=SBC_fit_to_draws_matrix]{SBC_fit_to_draws_matrix()}} and get some draws. If that's not possible an error should be raised. diff --git a/man/SBC_fit_to_draws_matrix.Rd b/man/SBC_fit_to_draws_matrix.Rd new file mode 100644 index 0000000..e8541ec --- /dev/null +++ b/man/SBC_fit_to_draws_matrix.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/backends.R +\name{SBC_fit_to_draws_matrix} +\alias{SBC_fit_to_draws_matrix} +\alias{SBC_fit_to_draws_matrix.default} +\title{S3 generic converting a fitted model to a \code{draws_matrix} object.} +\usage{ +SBC_fit_to_draws_matrix(fit) + +\method{SBC_fit_to_draws_matrix}{default}(fit) +} +\description{ +Needs to be implemented for all types of objects the backend can +return from \code{\link[=SBC_fit]{SBC_fit()}}. Default implementation just calls, +\code{\link[posterior:draws_matrix]{posterior::as_draws_matrix()}}, so if the fit object already supports +this, it will work out of the box. +} diff --git a/man/SBC_generator_brms.Rd b/man/SBC_generator_brms.Rd index b680667..55a5542 100644 --- a/man/SBC_generator_brms.Rd +++ b/man/SBC_generator_brms.Rd @@ -10,7 +10,7 @@ SBC_generator_brms(..., generate_lp = TRUE) \item{...}{arguments passed to \code{brms::brm}} \item{generate_lp}{whether to compute the overall log-likelihood of the model -as an additional parameter. This can be somewhat computationally expensive, +as an additional variable. This can be somewhat computationally expensive, but improves sensitivity of the SBC process.} } \description{ diff --git a/man/SBC_generator_custom.Rd b/man/SBC_generator_custom.Rd index 54272a7..b1fbb3d 100644 --- a/man/SBC_generator_custom.Rd +++ b/man/SBC_generator_custom.Rd @@ -7,7 +7,7 @@ SBC_generator_custom(f, ...) } \arguments{ -\item{f}{function accepting at least an \code{n_datasets} argument and returning +\item{f}{function accepting at least an \code{n_sims} argument and returning and \code{SBC_datasets} object} \item{...}{Additional arguments passed to \code{f}} @@ -17,11 +17,11 @@ This creates a very thin wrapper around the function and can store additional arguments, but does not do anything more.. } \details{ -Running:\if{html}{\out{
    }}\preformatted{gen <- SBC_generator_custom(f, <>) -datasets <- generate_datasets(gen, n_datasets = my_n_datasets) +Running:\if{html}{\out{
    }}\preformatted{gen <- SBC_generator_custom(f, <>) +datasets <- generate_datasets(gen, n_sims = my_n_sims) }\if{html}{\out{
    }} -is equivalent to just running\if{html}{\out{
    }}\preformatted{datasets <- f(<>, n_datasets = my_n_datasets) +is equivalent to just running\if{html}{\out{
    }}\preformatted{datasets <- f(<>, n_sims = my_n_sims) }\if{html}{\out{
    }} So whenever you control the code calling \code{generate_datasets}, diff --git a/man/SBC_generator_function.Rd b/man/SBC_generator_function.Rd index c3a8b09..bc4366e 100644 --- a/man/SBC_generator_function.Rd +++ b/man/SBC_generator_function.Rd @@ -7,7 +7,7 @@ SBC_generator_function(f, ...) } \arguments{ -\item{f}{function returning a list with elements \code{parameters} +\item{f}{function returning a list with elements \code{variables} (prior draws, a list or anything that can be converted to \code{draws_rvars}) and \code{generated} (observed dataset, ready to be passed to backend)} diff --git a/man/SBC_print_example_model.Rd b/man/SBC_print_example_model.Rd new file mode 100644 index 0000000..29bafe0 --- /dev/null +++ b/man/SBC_print_example_model.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/example.R +\name{SBC_print_example_model} +\alias{SBC_print_example_model} +\title{Print the Stan code of a model used in the examples.} +\usage{ +SBC_print_example_model( + example = c("normal_sd", "normal_bad"), + interface = c("rstan", "cmdstanr", "rjags") +) +} +\arguments{ +\item{example}{name of the example model.} +} +\description{ +Print the Stan code of a model used in the examples. +} diff --git a/man/SBC_results.Rd b/man/SBC_results.Rd index b64ba91..0cc8ae3 100644 --- a/man/SBC_results.Rd +++ b/man/SBC_results.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/results.R \name{SBC_results} \alias{SBC_results} -\title{SBC_results objects.} +\title{Create an \code{SBC_results} object} \usage{ SBC_results( stats, @@ -16,9 +16,12 @@ SBC_results( ) } \description{ +This will build and validate an \code{SBC_results} object from its constituents. +} +\details{ The \code{SBC_results} contains the following fields: \itemize{ -\item \verb{$stats} statistics for all parameters and fits (one row per parameter-fit combination) +\item \verb{$stats} statistics for all variables and fits (one row per variable-fit combination) \item \verb{$fits} the raw fits (unless \code{keep_fits = FALSE}) or \code{NULL} if the fit failed \item \verb{$errors} error messages that caused fit failures \item \verb{$outputs}, \verb{$messages}, \verb{$warnings} the outputs/messages/warnings written by fits diff --git a/man/SBC_statistics_from_single_fit.Rd b/man/SBC_statistics_from_single_fit.Rd new file mode 100644 index 0000000..d750ca2 --- /dev/null +++ b/man/SBC_statistics_from_single_fit.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/results.R +\name{SBC_statistics_from_single_fit} +\alias{SBC_statistics_from_single_fit} +\title{Recompute SBC statistics given a single fit.} +\usage{ +SBC_statistics_from_single_fit( + fit, + variables, + generated, + thin_ranks, + ensure_num_ranks_divisor, + gen_quants, + backend +) +} +\arguments{ +\item{thin_ranks}{how much thinning should be applied to posterior draws before computing +ranks for SBC. Should be large enough to avoid any noticeable autocorrelation of the +thinned draws See details below.} + +\item{ensure_num_ranks_divisor}{Potentially drop some posterior samples to +ensure that this number divides the total number of SBC ranks (see Details).} + +\item{backend}{the model + sampling algorithm. The built-in backends can be constructed +using \code{\link[=SBC_backend_cmdstan_sample]{SBC_backend_cmdstan_sample()}}, \code{\link[=SBC_backend_cmdstan_variational]{SBC_backend_cmdstan_variational()}}, +\code{\link[=SBC_backend_rstan_sample]{SBC_backend_rstan_sample()}}, \code{\link[=SBC_backend_rstan_optimizing]{SBC_backend_rstan_optimizing()}} and \code{\link[=SBC_backend_brms]{SBC_backend_brms()}}. +(more to come: issue 31, 38, 39). The backend is an S3 class supporting at least the \code{\link[=SBC_fit]{SBC_fit()}}, +\code{\link[=SBC_fit_to_draws_matrix]{SBC_fit_to_draws_matrix()}} methods.} +} +\description{ +Potentially useful for doing some advanced stuff, but should not +be used in regular workflow. Use \code{\link[=recompute_SBC_statistics]{recompute_SBC_statistics()}} to update +an \verb{[SBC_results]} objects with different \code{thin_ranks} or other settings. +} +\seealso{ +\code{\link[=recompute_SBC_statistics]{recompute_SBC_statistics()}} +} diff --git a/man/approx_quantile_phi.Rd b/man/approx_quantile_phi.Rd new file mode 100644 index 0000000..24ace58 --- /dev/null +++ b/man/approx_quantile_phi.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/self-calib.R +\name{approx_quantile_phi} +\alias{approx_quantile_phi} +\title{Given a vector of draws, return a vector of length S of phi which best approximates the CDF.} +\usage{ +approx_quantile_phi(draws, S) +} +\arguments{ +\item{draws}{a vector of sample draws from a distribution} + +\item{S}{number of quantile points} +} +\value{ +vector of phi which are quantile function values +} +\description{ +Given a vector of draws, return a vector of length S of phi which best approximates the CDF. +} diff --git a/man/bind_results.Rd b/man/bind_results.Rd index 381fa46..020f0c1 100644 --- a/man/bind_results.Rd +++ b/man/bind_results.Rd @@ -10,7 +10,7 @@ bind_results(...) \item{...}{objects of type \code{SBC_results} to be combined.} } \description{ -Primarily useful for iteratively adding more datasets to your SBC check. +Primarily useful for iteratively adding more simulations to your SBC check. } \details{ An example usage can be found in the \code{small_model_workflow} vignette. diff --git a/man/calculate_ranks_draws_matrix.Rd b/man/calculate_ranks_draws_matrix.Rd index d4df355..8ff39c1 100644 --- a/man/calculate_ranks_draws_matrix.Rd +++ b/man/calculate_ranks_draws_matrix.Rd @@ -2,16 +2,18 @@ % Please edit documentation in R/results.R \name{calculate_ranks_draws_matrix} \alias{calculate_ranks_draws_matrix} -\title{Calculate ranks given parameter values within a posterior distribution.} +\title{Calculate ranks given variable values within a posterior distribution.} \usage{ -calculate_ranks_draws_matrix(params, dm) +calculate_ranks_draws_matrix(variables, dm, params = NULL) } \arguments{ -\item{params}{a vector of values to check} +\item{variables}{a vector of values to check} \item{dm}{draws_matrix of the fit (assumed to be already thinned if that was necessary)} + +\item{params}{DEPRECATED. Use \code{variables} instead.} } \description{ -When there are ties (e.g. for discrete parameters), the rank is currently drawn stochastically +When there are ties (e.g. for discrete variables), the rank is currently drawn stochastically among the ties. } diff --git a/man/cjs_dist.Rd b/man/cjs_dist.Rd index 48a0a0e..7530238 100644 --- a/man/cjs_dist.Rd +++ b/man/cjs_dist.Rd @@ -4,18 +4,12 @@ \alias{cjs_dist} \title{Cumulative Jensen-Shannon divergence} \usage{ -cjs_dist( - x, - y, - x_weights = rep(1/length(x), length(x)), - y_weights = rep(1/length(y), length(y)), - ... -) +cjs_dist(x, y, x_weights, y_weights, ...) } \arguments{ -\item{x}{numeric vector of samples from first distribution} +\item{x}{numeric vector of draws from first distribution} -\item{y}{numeric vector of samples from second distribution} +\item{y}{numeric vector of draws from second distribution} \item{x_weights}{numeric vector of weights of first distribution} diff --git a/man/compute_results.Rd b/man/compute_SBC.Rd similarity index 75% rename from man/compute_results.Rd rename to man/compute_SBC.Rd index cca2ab0..0129322 100644 --- a/man/compute_results.Rd +++ b/man/compute_SBC.Rd @@ -1,15 +1,16 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/results.R -\name{compute_results} -\alias{compute_results} +\name{compute_SBC} +\alias{compute_SBC} \title{Fit datasets and evaluate diagnostics and SBC metrics.} \usage{ -compute_results( +compute_SBC( datasets, backend, cores_per_fit = default_cores_per_fit(length(datasets)), keep_fits = TRUE, thin_ranks = SBC_backend_default_thin_ranks(backend), + ensure_num_ranks_divisor = 2, chunk_size = default_chunk_size(length(datasets)), gen_quants = NULL, cache_mode = "none", @@ -21,7 +22,8 @@ compute_results( \item{datasets}{an object of class \code{SBC_datasets}} \item{backend}{the model + sampling algorithm. The built-in backends can be constructed -using \code{\link[=SBC_backend_cmdstan_sample]{SBC_backend_cmdstan_sample()}}, \code{\link[=SBC_backend_cmdstan_variational]{SBC_backend_cmdstan_variational()}}, \code{\link[=SBC_backend_rstan_sample]{SBC_backend_rstan_sample()}} and \code{\link[=SBC_backend_brms]{SBC_backend_brms()}}. +using \code{\link[=SBC_backend_cmdstan_sample]{SBC_backend_cmdstan_sample()}}, \code{\link[=SBC_backend_cmdstan_variational]{SBC_backend_cmdstan_variational()}}, +\code{\link[=SBC_backend_rstan_sample]{SBC_backend_rstan_sample()}}, \code{\link[=SBC_backend_rstan_optimizing]{SBC_backend_rstan_optimizing()}} and \code{\link[=SBC_backend_brms]{SBC_backend_brms()}}. (more to come: issue 31, 38, 39). The backend is an S3 class supporting at least the \code{\link[=SBC_fit]{SBC_fit()}}, \code{\link[=SBC_fit_to_draws_matrix]{SBC_fit_to_draws_matrix()}} methods.} @@ -31,16 +33,19 @@ than you have cores. See \code{\link[=default_cores_per_fit]{default_cores_per_f \item{keep_fits}{boolean, when \code{FALSE} full fits are discarded from memory - reduces memory consumption and increases speed (when processing in parallel), but -prevents you from inspecting the fits and using \code{\link[=recompute_statistics]{recompute_statistics()}}. +prevents you from inspecting the fits and using \code{\link[=recompute_SBC_statistics]{recompute_SBC_statistics()}}. We recommend to set to \code{TRUE} in early phases of workflow, when you run just a few fits. Once the model is stable and you want to run a lot of iterations, we recommend setting to \code{FALSE} (even for quite a simple model, 1000 fits can easily exhaust 32GB of RAM).} -\item{thin_ranks}{how much thinning should be applied to posterior samples before computing +\item{thin_ranks}{how much thinning should be applied to posterior draws before computing ranks for SBC. Should be large enough to avoid any noticeable autocorrelation of the -thinned samples.} +thinned draws See details below.} -\item{chunk_size}{How many fits of \code{datasets} shall be processed in one batch +\item{ensure_num_ranks_divisor}{Potentially drop some posterior samples to +ensure that this number divides the total number of SBC ranks (see Details).} + +\item{chunk_size}{How many simulations within the \code{datasets} shall be processed in one batch by the same worker. Relevant only when using parallel processing. The larger the value, the smaller overhead there will be for parallel processing, but the work may be distributed less equally across workers. We recommend setting this high @@ -78,7 +83,7 @@ of the \code{future} package. \section{Thinning}{ When using backends based on MCMC, there are two possible moments when -samples may need to be thinned. They can be thinned directly within the backend +draws may need to be thinned. They can be thinned directly within the backend and they may be thinned only to compute the ranks for SBC as specified by the \code{thin_ranks} argument. The main reason those are separate is that computing the ranks requires no or negligible autocorrelation while some autocorrelation @@ -92,7 +97,21 @@ the residual autocorrelation. For a backend based on Metropolis-Hastings, it might be sensible to thin quite aggressively already in the backend and then have some additional thinning via \code{thin_ranks}. -Backends that don't require thining should implement \code{\link[=SBC_backend_iid_samples]{SBC_backend_iid_samples()}} +Backends that don't require thining should implement \code{\link[=SBC_backend_iid_draws]{SBC_backend_iid_draws()}} or \code{\link[=SBC_backend_default_thin_ranks]{SBC_backend_default_thin_ranks()}} to avoid thinning by default. } +\section{Rank divisors}{ +Some of the visualizations and post processing steps +we use in the SBC package (e.g. \code{\link[=plot_rank_hist]{plot_rank_hist()}}, \code{\link[=empirical_coverage]{empirical_coverage()}}) +work best if the total number of possible SBC ranks is a "nice" number +(lots of divisors). +However, the number of ranks is one plus the number of posterior samples +after thinning - therefore as long as the number of samples is a "nice" +number, the number of ranks usually will not be. To remedy this, you can +specify \code{ensure_num_ranks_divisor} - the method will drop at most +\code{ensure_num_ranks_divisor - 1} samples to make the number of ranks divisible +by \code{ensure_num_ranks_divisor}. The default 2 prevents the most annoying +pathologies while discarding at most a single sample. +} + diff --git a/man/compute_results-deprecated.Rd b/man/compute_results-deprecated.Rd new file mode 100644 index 0000000..606dcdc --- /dev/null +++ b/man/compute_results-deprecated.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/results.R +\name{compute_results-deprecated} +\alias{compute_results-deprecated} +\title{Compute SBC results} +\description{ +Delegates directly to \code{compute_SBC()}. +} +\seealso{ +\code{\link{SBC-deprecated}} +} +\keyword{internal} diff --git a/man/empirical_coverage.Rd b/man/empirical_coverage.Rd index 694fdf4..bddf4a4 100644 --- a/man/empirical_coverage.Rd +++ b/man/empirical_coverage.Rd @@ -8,7 +8,7 @@ empirical_coverage(stats, width, prob = 0.95, interval_type = "central") } \arguments{ \item{stats}{a data.frame of rank statistics (e.g. as returned in the \verb{$stats} component of \link{SBC_results}), -at minimum should have at least \code{parameter}, \code{rank} and \code{max_rank} columns)} +at minimum should have at least \code{variable}, \code{rank} and \code{max_rank} columns)} \item{width}{a vector of values between 0 and 1 representing widths of credible intervals for which we compute coverage.} @@ -19,7 +19,7 @@ which we compute coverage.} or \code{"leftmost"} to show coverage of leftmost credible intervals (i.e. the observed CDF).} } \value{ -A \code{data.frame} with columns \code{parameter}, \code{width} (width of the interval as given +A \code{data.frame} with columns \code{variable}, \code{width} (width of the interval as given in the \code{width} parameter), \code{width_represented} the closest width that can be represented by the ranks in the input (any discrepancy needs to be judged against this rather than \code{width}), \code{estimate} - observed coverage for the interval, \code{ci_low}, \code{ci_high} the uncertainty diff --git a/man/generate_datasets.Rd b/man/generate_datasets.Rd index 4325203..89f4fb9 100644 --- a/man/generate_datasets.Rd +++ b/man/generate_datasets.Rd @@ -4,11 +4,15 @@ \alias{generate_datasets} \title{Generate datasets.} \usage{ -generate_datasets(generator, n_datasets) +generate_datasets(generator, n_sims, n_datasets = NULL) } \arguments{ \item{generator}{a generator object - build e.g. via \code{SBC_generator_function} or \code{SBC_generator_brms}.} + +\item{n_sims}{the number of simulated datasets to use} + +\item{n_datasets}{DEPRECATED, use \code{n_sims} instead.} } \value{ object of class \code{SBC_datasets} diff --git a/man/generated_quantities.Rd b/man/generated_quantities.Rd index 46fff36..812b782 100644 --- a/man/generated_quantities.Rd +++ b/man/generated_quantities.Rd @@ -18,6 +18,6 @@ objects available on all workers.} When the expression contains non-library functions/objects, and parallel processing is enabled, those must be named in the \code{.globals} parameter (hopefully we'll be able to detect those -automatically in the future). Note that \code{\link[=recompute_statistics]{recompute_statistics()}} currently +automatically in the future). Note that \code{\link[=recompute_SBC_statistics]{recompute_SBC_statistics()}} currently does not use parallel processing, so \code{.globals} don't need to be set. } diff --git a/man/guess_bins.Rd b/man/guess_rank_hist_bins.Rd similarity index 80% rename from man/guess_bins.Rd rename to man/guess_rank_hist_bins.Rd index d6f649a..d51b3d1 100644 --- a/man/guess_bins.Rd +++ b/man/guess_rank_hist_bins.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/plot.R -\name{guess_bins} -\alias{guess_bins} +\name{guess_rank_hist_bins} +\alias{guess_rank_hist_bins} \title{Guess the number of bins for \code{\link[=plot_rank_hist]{plot_rank_hist()}}.} \usage{ -guess_bins(max_rank, N) +guess_rank_hist_bins(max_rank, N) } \arguments{ \item{max_rank}{the maximum rank observed} diff --git a/man/invtf_param_vec.Rd b/man/invtf_param_vec.Rd new file mode 100644 index 0000000..d27d967 --- /dev/null +++ b/man/invtf_param_vec.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/util.R +\name{invtf_param_vec} +\alias{invtf_param_vec} +\title{Inverse transform parameters from uncontrained to constrained} +\usage{ +invtf_param_vec(param, link_type) +} +\arguments{ +\item{param}{a vector} + +\item{link_type}{int indicating link type} +} +\value{ +constrained parameter vector +} +\description{ +Inverse transform parameters from uncontrained to constrained +} diff --git a/man/plot_contraction.Rd b/man/plot_contraction.Rd index eafd267..ff1e2aa 100644 --- a/man/plot_contraction.Rd +++ b/man/plot_contraction.Rd @@ -4,22 +4,31 @@ \alias{plot_contraction} \title{Prior/posterior contraction plot.} \usage{ -plot_contraction(x, prior_sd, parameters = NULL, scale = "sd", alpha = 0.8) +plot_contraction( + x, + prior_sd, + variables = NULL, + scale = "sd", + alpha = 0.8, + parameters = NULL +) } \arguments{ \item{x}{object containing results (a data.frame or \code{\link[=SBC_results]{SBC_results()}} object).} -\item{prior_sd}{a named vector of prior standard deviations for your parameters. +\item{prior_sd}{a named vector of prior standard deviations for your variables. Either pass in analytically obtained values or use \code{\link[=calculate_prior_sd]{calculate_prior_sd()}} to get an empirical estimate from an \code{SBC_datasets} object.} -\item{parameters}{parameters to show in the plot or \code{NULL} to show all +\item{variables}{variables to show in the plot or \code{NULL} to show all must correspond a field already computed in the results (most likely \code{"mean"} and \code{"median"}).} \item{scale}{which scale of variability you want to see - either \code{"sd"} for standard deviation or \code{"var"} for variance.} \item{alpha}{the alpha for the points} + +\item{parameters}{DEPRECATED, use \code{variables} instead.} } \value{ a ggplot2 plot object diff --git a/man/plot_coverage.Rd b/man/plot_coverage.Rd index ea9391d..942d957 100644 --- a/man/plot_coverage.Rd +++ b/man/plot_coverage.Rd @@ -2,27 +2,46 @@ % Please edit documentation in R/plot.R \name{plot_coverage} \alias{plot_coverage} -\alias{plot_coverage.SBC_results} -\alias{plot_coverage.data.frame} -\title{Plot the observed coverage and its uncertainty} +\alias{plot_coverage_diff} +\title{Plot the observed coverage and its uncertainty.} \usage{ -plot_coverage(x, parameters = NULL, prob = 0.95, interval_type = "central") +plot_coverage( + x, + variables = NULL, + prob = 0.95, + interval_type = "central", + parameters = NULL +) -\method{plot_coverage}{SBC_results}(x, parameters = NULL, prob = 0.95, interval_type = "central") - -\method{plot_coverage}{data.frame}(x, parameters = NULL, prob = 0.95, interval_type = "central") +plot_coverage_diff( + x, + variables = NULL, + prob = 0.95, + interval_type = "central", + parameters = NULL +) } \arguments{ \item{x}{object containing results (a data.frame or \code{\link[=SBC_results]{SBC_results()}} object).} -\item{parameters}{parameters to show in the plot or \code{NULL} to show all} +\item{variables}{variables to show in the plot or \code{NULL} to show all} \item{prob}{the with of the uncertainty interval to be shown} + +\item{parameters}{DEPRECATED. Use \code{variables} instead.} } \value{ a ggplot2 plot object } \description{ +\code{plot_coverage} will plot the observed coverage, +while \code{plot_coverage_diff} will show the difference between observed +and expected coverage. Please refer to \code{\link[=empirical_coverage]{empirical_coverage()}} for details on computation and limitations of this plot as well as details on the arguments. +See \code{vignette("rank_visualizations")} for +more details. +} +\seealso{ +empirical_coverage } diff --git a/man/plot_rank_hist.Rd b/man/plot_rank_hist.Rd index f940dd1..9f4ec8d 100644 --- a/man/plot_rank_hist.Rd +++ b/man/plot_rank_hist.Rd @@ -4,12 +4,34 @@ \alias{plot_rank_hist} \title{Plot rank histogram of an SBC results.} \usage{ -plot_rank_hist(x, parameters = NULL, bins = NULL, prob = 0.95, ...) +plot_rank_hist( + x, + variables = NULL, + bins = NULL, + prob = 0.95, + ..., + parameters = NULL +) } \arguments{ \item{x}{Object supporting the plotting method.} + +\item{variables}{Names of variables to show} + +\item{bins}{number of bins to be used in the histogram, if left unspecified, +it is determined by \code{\link[=guess_rank_hist_bins]{guess_rank_hist_bins()}}.} + +\item{prob}{The width of the approximate confidence interval shown.} } \description{ +The expected uniform distribution and an approximate confidence interval +is also shown. The confidence interval cannot be taken too seriously +as it is derived assuming the bins are independent (which they are not). +The \code{\link[=plot_ecdf]{plot_ecdf()}} and \code{\link[=plot_ecdf_diff]{plot_ecdf_diff()}} plots provide better confidence interval +but are somewhat less interpretable. See \code{vignette("rank_visualizations")} for +more details. +} +\details{ By default the support is for \code{SBC_results} objects and data frames in the same format as the \verb{$stats} element of \code{SBC_results}. } diff --git a/man/plot_sim_estimated.Rd b/man/plot_sim_estimated.Rd index 8686c79..3621afb 100644 --- a/man/plot_sim_estimated.Rd +++ b/man/plot_sim_estimated.Rd @@ -6,16 +6,17 @@ \usage{ plot_sim_estimated( x, - parameters = NULL, + variables = NULL, estimate = "mean", uncertainty = c("q5", "q95"), - alpha = NULL + alpha = NULL, + parameters = NULL ) } \arguments{ \item{x}{object containing results (a data.frame or \code{\link[=SBC_results]{SBC_results()}} object).} -\item{parameters}{parameters to show in the plot or \code{NULL} to show all} +\item{variables}{variables to show in the plot or \code{NULL} to show all} \item{estimate}{which estimate to use for the central tendency, must correspond a field already computed in the results (most likely \code{"mean"} and \code{"median"}).} @@ -24,6 +25,8 @@ must correspond a field already computed in the results (most likely \code{"mean must correspond a field already computed in the results. Pass \code{NULL} to avoid showing uncertainty at all.} \item{alpha}{the alpha for the points and uncertainty intervals} + +\item{parameters}{DEPRECATED, use \code{variables} instead} } \value{ a ggplot2 plot object diff --git a/man/quantile_huber_loss.Rd b/man/quantile_huber_loss.Rd new file mode 100644 index 0000000..4ad917f --- /dev/null +++ b/man/quantile_huber_loss.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/self-calib.R +\name{quantile_huber_loss} +\alias{quantile_huber_loss} +\title{Calculate qualtile huber loss for a given tau tau_{s_index}} +\usage{ +quantile_huber_loss(phi_prior, phi_post, s_index, k, S, n_post_samples) +} +\arguments{ +\item{phi_prior}{vector of phis for the prior(pre-transformation) distribution} + +\item{phi_post}{vector of phis for the posterior(post-transformation) distribution} + +\item{s_index}{The tau index to calculate loss} + +\item{k}{interval to calculate loss. resulting loss will be in range [-k, k]} + +\item{S}{the number of phis. equal to length(phi_prior) = length(phi_post)} + +\item{n_post_samples}{the number of samples to draw from posterior, to approximate the expected huber loss} +} +\value{ +a vector of length 2, where the first value is the expected huber loss and the second value the number of posterior samples less than the quantile value at phi[s_index] +} +\description{ +Calculate qualtile huber loss for a given tau tau_{s_index} +} diff --git a/man/rank2unif.Rd b/man/rank2unif.Rd index 9bb1408..e4352ea 100644 --- a/man/rank2unif.Rd +++ b/man/rank2unif.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/metric.R \name{rank2unif} \alias{rank2unif} -\title{Distance between binned samples (rank for SBC) and discrete uniform} +\title{Distance between binned draws (rank for SBC) and discrete uniform} \usage{ rank2unif(results, par, bins = 20) } @@ -16,5 +16,5 @@ rank2unif(results, par, bins = 20) \item{thin}{integer in which thinning was applied} } \description{ -Distance between binned samples (rank for SBC) and discrete uniform +Distance between binned draws (rank for SBC) and discrete uniform } diff --git a/man/recompute_statistics.Rd b/man/recompute_SBC_statistics.Rd similarity index 52% rename from man/recompute_statistics.Rd rename to man/recompute_SBC_statistics.Rd index 8ec690f..469d267 100644 --- a/man/recompute_statistics.Rd +++ b/man/recompute_SBC_statistics.Rd @@ -1,25 +1,38 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/results.R -\name{recompute_statistics} -\alias{recompute_statistics} +\name{recompute_SBC_statistics} +\alias{recompute_SBC_statistics} \title{Recompute SBC statistics without refitting models.} \usage{ -recompute_statistics( +recompute_SBC_statistics( old_results, datasets, backend, thin_ranks = SBC_backend_default_thin_ranks(backend), + ensure_num_ranks_divisor = 2, gen_quants = NULL ) } \arguments{ +\item{datasets}{an object of class \code{SBC_datasets}} + \item{backend}{backend used to fit the results. Used to pull various defaults and other setting influencing the computation of statistics.} + +\item{thin_ranks}{how much thinning should be applied to posterior draws before computing +ranks for SBC. Should be large enough to avoid any noticeable autocorrelation of the +thinned draws See details below.} + +\item{ensure_num_ranks_divisor}{Potentially drop some posterior samples to +ensure that this number divides the total number of SBC ranks (see Details).} } \value{ An S3 object of class \code{SBC_results} with updated \verb{$stats} and \verb{$default_diagnostics} fields. } \description{ +Recompute SBC statistics without refitting models. +} +\details{ Useful for example to recompute SBC ranks with a different choice of \code{thin_ranks} or added generated quantities. } diff --git a/man/recompute_statistics-deprecated.Rd b/man/recompute_statistics-deprecated.Rd new file mode 100644 index 0000000..49b29fd --- /dev/null +++ b/man/recompute_statistics-deprecated.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/results.R +\name{recompute_statistics-deprecated} +\alias{recompute_statistics-deprecated} +\title{Recompute SBC statistics without refitting models.} +\description{ +Delegates directly to \code{recompute_SBC_statistics()}. +} +\seealso{ +\code{\link{SBC-deprecated}} +} +\keyword{internal} diff --git a/man/rnorm_max_coupling.Rd b/man/rnorm_max_coupling.Rd new file mode 100644 index 0000000..c487fd4 --- /dev/null +++ b/man/rnorm_max_coupling.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/util.R +\name{rnorm_max_coupling} +\alias{rnorm_max_coupling} +\title{Maximal coupling of two univariate Normal distributions +from https://github.com/pierrejacob/debiasedhmc/blob/1a2eeeb041eea4e5c050e5188e7100f31e61e35b/R/gaussian_couplings.R} +\usage{ +rnorm_max_coupling(mu1, mu2, sigma1, sigma2) +} +\arguments{ +\item{mu1}{mean of first distribution} + +\item{mu2}{mean of second distribution} + +\item{sigma1}{standard deviation of first distribution} + +\item{sigma2}{standard deviation of second distribution} +} +\description{ +Sample from maximal coupling of two univariate Normal distributions, +specified through their means and standard deviations. +} diff --git a/man/sample_quantile_phi.Rd b/man/sample_quantile_phi.Rd new file mode 100644 index 0000000..0952bc9 --- /dev/null +++ b/man/sample_quantile_phi.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/self-calib.R +\name{sample_quantile_phi} +\alias{sample_quantile_phi} +\title{Given a vector of phis, which represent the quantiles of equally spaced probabilities on \link{0, 1} defined as i/S, return a function that returns N random samples from the quantile function} +\usage{ +sample_quantile_phi(N, phis) +} +\arguments{ +\item{N}{number of samples to draw, returned as a vector of length N} + +\item{phis}{vector of phis to sample from} +} +\value{ +a vector of samples drawn by inverse transform sampling +} +\description{ +Given a vector of phis, which represent the quantiles of equally spaced probabilities on \link{0, 1} defined as i/S, return a function that returns N random samples from the quantile function +} diff --git a/man/self_calib_adaptive.Rd b/man/self_calib_adaptive.Rd new file mode 100644 index 0000000..920db11 --- /dev/null +++ b/man/self_calib_adaptive.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/self-calib.R +\name{self_calib_adaptive} +\alias{self_calib_adaptive} +\title{Auto calibrate the initial prior samples using SBC iterations, with an adaptive update strategy} +\usage{ +self_calib_adaptive( + generator, + backend, + updator, + target_params, + init_lambdas, + nsims, + gamma, + tol, + fixed_args +) +} +\arguments{ +\item{generator}{function that generates datasets given each value in \code{param}} + +\item{backend}{backend object to use for running SBC} + +\item{updator}{hyperparameter update type} + +\item{nsims}{number of datasets i.e. prior draws} + +\item{gamma}{convergence speed e.g. step size} + +\item{tol}{tolerence for determining termination} + +\item{fixed_args}{\emph{named list} containing additional arguments to pass to generator, \emph{after mu and sigma}} + +\item{target_param}{list of strings indicating target parameter names} + +\item{init_mu}{initial lambda_mu value to use} + +\item{init_sigma}{initial lambda_sigma value to use} +} +\description{ +Auto calibrate the initial prior samples using SBC iterations, with an adaptive update strategy +} diff --git a/man/self_calib_gaussian.Rd b/man/self_calib_gaussian.Rd new file mode 100644 index 0000000..711d04b --- /dev/null +++ b/man/self_calib_gaussian.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/self-calib.R +\name{self_calib_gaussian} +\alias{self_calib_gaussian} +\title{Auto calibrate the initial prior samples using SBC iteration for gaussian approximation} +\usage{ +self_calib_gaussian( + generator, + backend, + mixture_means_init_draws_rvars, + mixture_bw_init_draws_rvars, + nsims_fn, + thin, + max_selfcalib_iters, + save_all_results, + transform_types, + fixed_generator_args +) +} +\arguments{ +\item{generator}{function that generates datasets given each value in \code{param}} + +\item{backend}{A backend object to use for running SBC} + +\item{mixture_means_init_draws_rvars}{the initial mixture mean draws_rvars} + +\item{mixture_bw_init_draws_rvars}{the initial mixture bandwidth draws_rvars} + +\item{nsims_fn}{function with input: (mixture_means_rvar, mixture_bw_rvar), output: int +int is future number of parallel datasets to generate given true and its fitted hyperparameter (mixture_means)} + +\item{thin}{Integer defining thinning parameter} + +\item{max_selfcalib_iters}{the maximum number of iterations to run calibration. if not given will run indefinitely} + +\item{save_all_results}{Boolean if TRUE returns a list of all SBC results, FALSE returns just the result of the last iteration.} + +\item{transform_types}{Transformtype for mixture fitting} + +\item{fixed_generator_args}{\emph{named list} containing additional arguments to pass to generator, \emph{after mixture_means_draws_rvars and mixture_bw_draws_rvars}} +} +\description{ +Auto calibrate the initial prior samples using SBC iteration for gaussian approximation +} diff --git a/man/self_calib_gmm.Rd b/man/self_calib_gmm.Rd new file mode 100644 index 0000000..649f12f --- /dev/null +++ b/man/self_calib_gmm.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/self-calib.R +\name{self_calib_gmm} +\alias{self_calib_gmm} +\title{Auto calibrate the initial prior samples using SBC iteration and gmm approximation} +\usage{ +self_calib_gmm( + generator, + backend, + mixture_means_init_draws_rvars, + mixture_bw_init_draws_rvars, + nsims_fn, + thin, + max_selfcalib_iters, + save_all_results, + transform_types, + fixed_generator_args +) +} +\arguments{ +\item{generator}{function that generates datasets given each value in \code{param}} + +\item{backend}{A backend object to use for running SBC} + +\item{mixture_means_init_draws_rvars}{the initial mixture mean draws_rvars} + +\item{mixture_bw_init_draws_rvars}{the initial mixture bandwidth draws_rvars} + +\item{nsims_fn}{function with input: (mixture_means_rvar, mixture_bw_rvar), output: int +int is future number of parallel datasets to generate given true and its fitted hyperparameter (mixture_means)} + +\item{thin}{Integer defining thinning parameter} + +\item{max_selfcalib_iters}{the maximum number of iterations to run calibration. if not given will run indefinitely} + +\item{save_all_results}{Boolean if TRUE returns a list of all SBC results, FALSE returns just the result of the last iteration.} + +\item{transform_types}{Transformtype for mixture fitting} + +\item{fixed_generator_args}{\emph{named list} containing additional arguments to pass to generator, \emph{after mixture_means_draws_rvars and mixture_bw_draws_rvars}} +} +\description{ +Auto calibrate the initial prior samples using SBC iteration and gmm approximation +} diff --git a/man/set2set.Rd b/man/set2set.Rd index 3225e64..6c1cbde 100644 --- a/man/set2set.Rd +++ b/man/set2set.Rd @@ -2,19 +2,19 @@ % Please edit documentation in R/metric.R \name{set2set} \alias{set2set} -\title{Summarize relational property of overall prior and posterior samples} +\title{Summarize relational property of overall prior and posterior draws} \usage{ set2set(priors, posteriors, par, bins = 20) } \arguments{ -\item{priors}{A posterior::draws_rvars of dimension(n_iterations=1, n_chains=n_sbc_iterations, n_variables=n_variables) which stores prior samples} +\item{priors}{A posterior::draws_rvars of dimension(n_iterations=1, n_chains=n_sbc_iterations, n_variables=n_variables) which stores prior draws} -\item{posteriors}{A posterior::draws_Rvars of dimension(n_iterations=n_posterior_samples, n_chains=n_sbc_iterations, n_variables=n_variables), which stores fitted posterior samples} +\item{posteriors}{A posterior::draws_Rvars of dimension(n_iterations=n_posterior_draws, n_chains=n_sbc_iterations, n_variables=n_variables), which stores fitted posterior draws} \item{par}{names of parameter to summarize} \item{bins}{number of bins for prior and post density} } \description{ -Summarize relational property of overall prior and posterior samples +Summarize relational property of overall prior and posterior draws } diff --git a/man/statistics_from_single_fit.Rd b/man/statistics_from_single_fit.Rd deleted file mode 100644 index b0700d5..0000000 --- a/man/statistics_from_single_fit.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/results.R -\name{statistics_from_single_fit} -\alias{statistics_from_single_fit} -\title{Recompute SBC statistics given a single fit.} -\usage{ -statistics_from_single_fit( - fit, - parameters, - generated, - thin_ranks, - gen_quants, - backend -) -} -\description{ -Potentially useful for doing some advanced stuff, but should not -be used in regular workflow. Use \code{\link[=recompute_statistics]{recompute_statistics()}} to update -an \verb{[SBC_results]} objects with different \code{thin_ranks} or other settings. -} -\seealso{ -\code{\link[=recompute_statistics]{recompute_statistics()}} -} diff --git a/man/tf_param.Rd b/man/tf_param.Rd new file mode 100644 index 0000000..334e2e9 --- /dev/null +++ b/man/tf_param.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/util.R +\name{tf_param} +\alias{tf_param} +\title{Transform parameters from constrained to uncontrained} +\usage{ +tf_param(param) +} +\arguments{ +\item{param}{\code{draws_rvars} type parameter values} +} +\value{ +list of uncontrained parameters and transformation type +} +\description{ +Transform parameters from constrained to uncontrained +} diff --git a/man/tf_param_vec.Rd b/man/tf_param_vec.Rd new file mode 100644 index 0000000..16d69ef --- /dev/null +++ b/man/tf_param_vec.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/util.R +\name{tf_param_vec} +\alias{tf_param_vec} +\title{Transform parameters from constrained to uncontrained} +\usage{ +tf_param_vec(param, tf) +} +\arguments{ +\item{param}{a vector} + +\item{tf}{string indicating transformation type} +} +\value{ +list containing uncontrained parameters and transformation type +} +\description{ +Transform parameters from constrained to uncontrained +} diff --git a/man/update_quantile_approximation.Rd b/man/update_quantile_approximation.Rd new file mode 100644 index 0000000..49bf6f9 --- /dev/null +++ b/man/update_quantile_approximation.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/self-calib.R +\name{update_quantile_approximation} +\alias{update_quantile_approximation} +\title{Update mixture mean through quantile approxiation based on analytic gradient of quantile loss} +\usage{ +update_quantile_approximation( + hyperparam_rvar, + hyperparam_hat_rvar, + S, + n_post_samples, + epsilon +) +} +\arguments{ +\item{hyperparam_rvar}{a posterior::rvar object of prior(pre-transformation) mixture mean values} + +\item{hyperparam_hat_rvar}{a posterior::rvar object of posterior(post-transformation) mixture mean values} + +\item{S}{Number of approximation points for the target quantile function.} + +\item{n_post_samples}{the number of samples to draw from posterior, to approximate the expected quantile loss} + +\item{epsilon}{gradient update coefficient} +} +\value{ +a posterior::rvar object with the same dimension as the input rvars. +} +\description{ +Update mixture mean through quantile approxiation based on analytic gradient of quantile loss +} diff --git a/tests/testthat/test-datasets.R b/tests/testthat/test-datasets.R index cb66981..0091e07 100644 --- a/tests/testthat/test-datasets.R +++ b/tests/testthat/test-datasets.R @@ -8,13 +8,13 @@ test_that("Generating datasets via functions", { names(delta) <- LETTERS[1:3] y1 <- rnorm(N, mu, sigma) y2 <- rnorm(2 * N, mu + 5, sigma) - list(parameters = list(mu = mu, sigma = sigma, beta = beta, gamma = gamma, delta = delta), + list(variables = list(mu = mu, sigma = sigma, beta = beta, gamma = gamma, delta = delta), generated = list(y1 = y1, y2 = y2)) } res <- generate_datasets( SBC_generator_function(list_function, N = 10), - n_datasets = 7) + n_sims = 7) expect_true(length(res) == 7) @@ -22,21 +22,21 @@ test_that("Generating datasets via functions", { gamma_vars <- paste0("gamma[", rep(1:2, times = 6), ",", rep(rep(1:3, each = 2), times = 2), ",", rep(1:2, each = 6), "]") delta_vars <- paste0("delta[",LETTERS[1:3],"]") - expect_identical(posterior::variables(res$parameters), c("mu", "sigma", beta_vars, gamma_vars, delta_vars)) + expect_identical(posterior::variables(res$variables), c("mu", "sigma", beta_vars, gamma_vars, delta_vars)) expect_identical(names(res$generated[[1]]), c("y1", "y2")) - expect_equal(posterior::ndraws(res$parameters), 7) + expect_equal(posterior::ndraws(res$variables), 7) - direct_func <- function(n_datasets, base_indices = 1:length(res)) { - res[base_indices[rep(1:length(base_indices), length.out = n_datasets)]] + direct_func <- function(n_sims, base_indices = 1:length(res)) { + res[base_indices[rep(1:length(base_indices), length.out = n_sims)]] } - res_direct1 <- generate_datasets(SBC_generator_custom(direct_func), n_datasets = 7) + res_direct1 <- generate_datasets(SBC_generator_custom(direct_func), n_sims = 7) expect_equal(res, res_direct1, check.attributes = FALSE) res_direct2 <- generate_datasets(SBC_generator_custom(direct_func, base_indices = 1:3), - n_datasets = 5) + n_sims = 5) expect_identical(res[c(1,2,3,1,2)], res_direct2) @@ -44,32 +44,32 @@ test_that("Generating datasets via functions", { test_that("Generating datasets via functions - exceptions", { missing_gen_function <- function() { - list(parameters = list(mu = 1), + list(variables = list(mu = 1), not_generated = 1) } expect_error(generate_datasets( SBC_generator_function(missing_gen_function), - n_datasets = 1), class = "SBC_datasets_error") + n_sims = 1), class = "SBC_datasets_error") missing_par_function <- function() { - list(not_parameters = list(mu = 1), + list(not_variables = list(mu = 1), generated = 1) } expect_error(generate_datasets( SBC_generator_function(missing_par_function), - n_datasets = 1), class = "SBC_datasets_error") + n_sims = 1), class = "SBC_datasets_error") missing_names_function <- function() { - list(parameters = list(mu = 1, 5), + list(variables = list(mu = 1, 5), generated = 1) } expect_error(generate_datasets( SBC_generator_function(missing_names_function), - n_datasets = 1), class = "SBC_datasets_error") + n_sims = 1), class = "SBC_datasets_error") }) test_that("subsetting datasets", { @@ -78,17 +78,17 @@ test_that("subsetting datasets", { sigma <- abs(rnorm(1)) y1 <- rnorm(N, mu, sigma) y2 <- rnorm(2 * N, mu + 5, sigma) - list(parameters = list(mu = mu, sigma = sigma), + list(variables = list(mu = mu, sigma = sigma), generated = list(y1 = y1, y2 = y2)) } res <- generate_datasets( SBC_generator_function(list_function, N = 10), - n_datasets = 7) + n_sims = 7) res_subs <- res[3:5] - expect_identical(res_subs$parameters, posterior::subset_draws(res$parameters, draw = 3:5)) + expect_identical(res_subs$variables, posterior::subset_draws(res$variables, draw = 3:5)) expect_identical(res_subs$generated[[1]], res$generated[[3]]) expect_identical(res_subs$generated[[2]], res$generated[[4]]) expect_identical(res_subs$generated[[3]], res$generated[[5]]) diff --git a/tests/testthat/test-integration.R b/tests/testthat/test-integration.R index 1a9564b..d343722 100644 --- a/tests/testthat/test-integration.R +++ b/tests/testthat/test-integration.R @@ -16,7 +16,8 @@ test_that("Integration test with mock backend", { true_result <- posterior::draws_matrix(a = a_vals_true, b = b_vals_true) datasets <- SBC_datasets(true_result,generated = rep(list(NULL), N_sims)) - res <- compute_results(datasets, backend, thin_ranks = 1) + res <- compute_SBC(datasets, backend, thin_ranks = 1, + ensure_num_ranks_divisor = 1) expected_ranks <- rep(1:N_sims, each = 2) expect_equivalent(sort(res$stats$rank), expected_ranks) @@ -28,19 +29,19 @@ test_that("Integration test with mock backend", { backend2 <- backend backend2$error <- SBC:::SBC_error("SBC_test_error", "ERR") - res2_with_outputs <- SBC:::capture_all_outputs(compute_results(datasets, backend2, thin_ranks = 1)) + res2_with_outputs <- SBC:::capture_all_outputs(compute_SBC(datasets, backend2, thin_ranks = 1)) res2 <- res2_with_outputs$result expect_identical(res2$errors, rep(list(backend2$error), N_sims)) expect_identical(res2$fits, rep(list(NULL), N_sims)) - expect_equal(res2_with_outputs$warnings, "All datasets produced error when fitting") + expect_equal(res2_with_outputs$warnings, "All simulations produced error when fitting") }) test_that("Result caching", { set.seed(1521336) - N_sims <- 10 + N_sims <- 100 a_vals <- rnorm(N_sims) fit_result <- posterior::draws_matrix(a = a_vals) @@ -51,37 +52,95 @@ test_that("Result caching", { cache_file <- tempfile(fileext = ".rds") res_first <- SBC:::capture_all_outputs( - compute_results(datasets, backend, thin_ranks = 1, cache_mode = "results", cache_location = cache_file)) + compute_SBC(datasets, backend, thin_ranks = 1, cache_mode = "results", cache_location = cache_file)) expect_false(any(grepl("cache", c(res_first$output, res_first$messages, res_first$warnings)))) + file_info_cols <- c("size", "mtime", "ctime") + # Succesful load from cache + old_file_info <- file.info(cache_file) expect_message( - compute_results(datasets, backend, thin_ranks = 1, cache_mode = "results", cache_location = cache_file), + compute_SBC(datasets, backend, thin_ranks = 1, cache_mode = "results", cache_location = cache_file), "loaded from cache" ) + new_file_info <- file.info(cache_file) + expect_identical(old_file_info[, file_info_cols], new_file_info[, file_info_cols]) # Change datasets + old_file_info <- file.info(cache_file) datasets_changed <- datasets datasets_changed[[3]] <- "a" expect_message( - compute_results(datasets_changed, backend, thin_ranks = 1, cache_mode = "results", cache_location = cache_file), + compute_SBC(datasets_changed, backend, thin_ranks = 1, cache_mode = "results", cache_location = cache_file), "datasets.*differ.*recompute" ) + new_file_info <- file.info(cache_file) + expect_lt(old_file_info$mtime, new_file_info$mtime) + # Now should be succesful + old_file_info <- file.info(cache_file) expect_message( - compute_results(datasets_changed, backend, thin_ranks = 1, cache_mode = "results", cache_location = cache_file), + compute_SBC(datasets_changed, backend, thin_ranks = 1, cache_mode = "results", cache_location = cache_file), "loaded from cache" ) + new_file_info <- file.info(cache_file) + expect_identical(old_file_info[, file_info_cols], new_file_info[, file_info_cols]) # Change backend + old_file_info <- file.info(cache_file) backend_changed <- backend backend_changed$result[5, "a"] <- 0 expect_message( - compute_results(datasets_changed, backend_changed, thin_ranks = 1, cache_mode = "results", cache_location = cache_file), + compute_SBC(datasets_changed, backend_changed, thin_ranks = 1, cache_mode = "results", cache_location = cache_file), "backend.*differ.*recompute" ) + new_file_info <- file.info(cache_file) + expect_lt(old_file_info$mtime, new_file_info$mtime) + + # Change thin_ranks, so should call recompute_statistics and not touch the cache + old_file_info <- file.info(cache_file) + expect_message( + compute_SBC(datasets_changed, backend_changed, thin_ranks = 2, cache_mode = "results", cache_location = cache_file), + "recompute_SBC_statistics" + ) + new_file_info <- file.info(cache_file) + expect_identical(old_file_info[, file_info_cols], new_file_info[, file_info_cols]) + + # Repeating should give the same result + expect_message( + compute_SBC(datasets_changed, backend_changed, thin_ranks = 2, cache_mode = "results", cache_location = cache_file), + "recompute_SBC_statistics" + ) + new_file_info <- file.info(cache_file) + expect_identical(old_file_info[, file_info_cols], new_file_info[, file_info_cols]) + + # Start fresh again + # Store cache but with keep_fits = FALSE + cache_file2 <- tempfile(fileext = ".rds") + res_first_nokeep <- SBC:::capture_all_outputs( + compute_SBC(datasets, backend, thin_ranks = 1, cache_mode = "results", keep_fits = FALSE, cache_location = cache_file2)) + + expect_false(any(grepl("cache", c(res_first_nokeep$output, res_first_nokeep$messages, res_first_nokeep$warnings)))) + + # Change thin_ranks, but since keep_fits = FALSE should force a refit and update cache + old_file_info <- file.info(cache_file2) + expect_message( + compute_SBC(datasets, backend, thin_ranks = 2, cache_mode = "results", cache_location = cache_file2), + "exists.*keep_fits == FALSE.*recompute" + ) + new_file_info <- file.info(cache_file2) + expect_lt(old_file_info$mtime, new_file_info$mtime) + + # Repeating should give the same result + old_file_info <- file.info(cache_file2) + expect_message( + compute_SBC(datasets, backend, thin_ranks = 2, cache_mode = "results", cache_location = cache_file2), + "loaded from cache" + ) + new_file_info <- file.info(cache_file2) + expect_identical(old_file_info[, file_info_cols], new_file_info[, file_info_cols]) }) diff --git a/tests/testthat/test-results.R b/tests/testthat/test-results.R index a17a8c5..a9e69c2 100644 --- a/tests/testthat/test-results.R +++ b/tests/testthat/test-results.R @@ -1,42 +1,85 @@ test_that("capture_all_outputs", { expect_identical( capture_all_outputs({ - cat("Test") + cat("Test\n") warning("W") message("M", appendLF = FALSE) warning("W2") message("M2", appendLF = FALSE) message("M3", appendLF = FALSE) + + # A special case - silent error + try(stop("Error")) + 14 - }), + }), list(result = 14, messages = c("M", "M2", "M3"), warnings = c("W", "W2"), - output = "Test")) + output = c('Test', 'Error in try(stop("Error")) : Error'))) + + # Nested capture.output + + expect_identical( + capture_all_outputs({ + captured <- capture_all_outputs({ + cat("Test\n") + warning("W") + message("M", appendLF = FALSE) + + # A special case - silent error + try(stop("Error")) + + 28 + + }) + cat("BEFORE\n") + message("M_BEFORE", appendLF = FALSE) + warning("W_BEFORE") + try(stop("E_BEFORE")) + reemit_captured(captured) + try(stop("E_AFTER")) + warning("W_AFTER") + message("M_AFTER", appendLF = FALSE) + cat("AFTER\n") + 13 + }), + list(result = 13, + messages = c("M_BEFORE", "M", "M_AFTER"), + warnings = c("W_BEFORE", "W", "W_AFTER"), + output = c('BEFORE', + 'Error in try(stop("E_BEFORE")) : E_BEFORE', + 'Test', + 'Error in try(stop("Error")) : Error', + 'Error in try(stop("E_AFTER")) : E_AFTER', + 'AFTER' + )) + + ) }) test_that("subset_bind", { - res <- SBC_results(stats = data.frame(dataset_id = rep(1:3, each = 4), s = 1:12), + res <- SBC_results(stats = data.frame(sim_id = rep(1:3, each = 4), s = 1:12), fits = list("A", NULL, "C"), outputs = list(c("A1","A2"), c(), c("C1", "C4")), warnings = list(c(), "XXXX", "asdfdaf"), messages = list("aaaa", "ddddd", NA_character_), errors = list(NULL, "customerror", NULL), - default_diagnostics = data.frame(dataset_id = 1:3, qq = rnorm(3)), - backend_diagnostics = data.frame(dataset_id = 1:3, rr = rnorm(3)) + default_diagnostics = data.frame(sim_id = 1:3, qq = rnorm(3)), + backend_diagnostics = data.frame(sim_id = 1:3, rr = rnorm(3)) ) - remove_dataset_id_names <- function(x) { - names(x$stats$dataset_id) <- NULL - names(x$default_diagnostics$dataset_id) <- NULL - names(x$backend_diagnostics$dataset_id) <- NULL + remove_sim_id_names <- function(x) { + names(x$stats$sim_id) <- NULL + names(x$default_diagnostics$sim_id) <- NULL + names(x$backend_diagnostics$sim_id) <- NULL x } - expect_equal(res, remove_dataset_id_names(bind_results(res[1], res[2:3]))) - expect_equal(res, remove_dataset_id_names(bind_results(res[1:2], res[3]))) - expect_equal(remove_dataset_id_names(res[3:1]), remove_dataset_id_names(bind_results(res[3:2], res[1]))) - expect_equal(remove_dataset_id_names(res[2]), remove_dataset_id_names(((res[2:3])[1]))) + expect_equal(res, remove_sim_id_names(bind_results(res[1], res[2:3]))) + expect_equal(res, remove_sim_id_names(bind_results(res[1:2], res[3]))) + expect_equal(remove_sim_id_names(res[3:1]), remove_sim_id_names(bind_results(res[3:2], res[1]))) + expect_equal(remove_sim_id_names(res[2]), remove_sim_id_names(((res[2:3])[1]))) }) test_that("calculate_ranks_draws_matrix works", { @@ -50,13 +93,13 @@ test_that("calculate_ranks_draws_matrix works", { dm[, "d"] <- sample(c(1:5, 1:5)) dm <- posterior::as_draws_matrix(dm) - params <- matrix(c(3.5, -5, 15, 3), nrow = 1) - colnames(params) <- c("a","b","c", "d") + vars <- matrix(c(3.5, -5, 15, 3), nrow = 1) + colnames(vars) <- c("a","b","c", "d") N_steps <- 1e4 all_ranks <- matrix(NA_real_, nrow = N_steps, ncol = 4) for(i in 1:N_steps) { - last_ranks <- calculate_ranks_draws_matrix(params, dm) + last_ranks <- calculate_ranks_draws_matrix(vars, dm) all_ranks[i,] <- last_ranks } @@ -92,8 +135,8 @@ test_that("calculate_sds_draws_matrix", { expect_identical(calculate_sds_draws_matrix(dm), expected_res) }) -test_that("statistics_from_single_fit", { - params <- posterior::as_draws_matrix( +test_that("SBC_statistics_from_single_fit", { + vars <- posterior::as_draws_matrix( posterior::draws_rvars( mu = posterior::rvar(4) , tau = posterior::rvar(4), @@ -101,13 +144,35 @@ test_that("statistics_from_single_fit", { # Can't really check correctness, only # testing that no error is thrown and structure is OK - res <- statistics_from_single_fit(posterior::example_draws(example = "eight_schools"), - parameters = params, thin_ranks = 1, gen_quants = NULL, + test_draws <- posterior::example_draws(example = "eight_schools") + res <- SBC_statistics_from_single_fit(test_draws, + variables = vars, thin_ranks = 1, gen_quants = NULL, + ensure_num_ranks_divisor = 1, backend = SBC_backend_mock()) + expect_equal(length(unique(res$max_rank)), 1) + expect_equal(unique(res$max_rank), posterior::ndraws(test_draws)) expect_true(all(res$rank >= 0 & res$rank < res$max_rank)) - expect_equal(res$simulated_value, as.numeric(params)) + expect_equal(res$simulated_value, as.numeric(vars)) expect_identical(res$mean > res$simulated_value, sign(res$z_score) < 0) + # Test ensure_num_ranks_divisor + # Make sure the test draws have the expected size before proceeding + expect_equal(posterior::ndraws(test_draws), 400) + res_ensure2 <- SBC_statistics_from_single_fit(posterior::example_draws(example = "eight_schools"), + variables = vars, thin_ranks = 1, gen_quants = NULL, + ensure_num_ranks_divisor = 2, + backend = SBC_backend_mock()) + # Number of ranks = max_rank + 1 (as 0 is a valid rank) + expect_equal(unique(res_ensure2$max_rank), 399) + + + # Test ensure_num_ranks_divisor, combined with thin_ranks + res_ensure7 <- SBC_statistics_from_single_fit(posterior::example_draws(example = "eight_schools"), + variables = vars, thin_ranks = 4, gen_quants = NULL, + ensure_num_ranks_divisor = 7, + backend = SBC_backend_mock()) + expect_equal(unique(res_ensure7$max_rank), 97) + }) diff --git a/vignettes/DAP_binom_optimization.Rmd b/vignettes/DAP_binom_optimization.Rmd new file mode 100644 index 0000000..53911f9 --- /dev/null +++ b/vignettes/DAP_binom_optimization.Rmd @@ -0,0 +1,524 @@ +--- +title: "DAP Optimization Part 1" +author: "Hyunji Moon, Shinyoung Kim" +output: + html_document: default + pdf_document: default +--- +```{r setup, include=FALSE, warning=FALSE} +library(SBC) +library(cmdstanr) +library(parallel) +library(bayesplot) +library(posterior) +library(dplyr) +library(rstan) +library(future) +library(ggpubr) +library(rstanarm) +library(ggplot2) +library(mclust) +library(plot3D) +library(nloptr) +options(mc.cores = parallel::detectCores()) +plan(multisession) +options(SBC.min_chunk_size = 5) +#set.seed(1984) +``` + + +Given the previous definition of the Data Averaged Posterior(DAP) and MSE decomposition, in this vignette we will investigate the effect off different inference algorithms on DAP and MSE, and how we can try and optimize a given model to have desirable traits with respect to them. + +The Binomial-inverseLogit model we're using in this vignette has a parameter$\eta$ in which $p = \mathrm{inverse\_logit}(\eta)$ is used as the success probability. The vignette is largely separated into 3 parts: + +1. Given a fixed parameterization, investigate the behavior of DAP with respect to inference algorithm. +2. Given a fixed inference algorithm, investigate the behavior of DAP with respect to parameterization. +3. Given a fixed distribution family for $eta$, find the hyperparameters which minimizes the MSE. + +```{R, warning=FALSE, error=FALSE} +## Generator settings +# number of SBC simulations per iteration (generator) +nsims <- 100 + +# number of observations +nobs <- 10#2 + +# link function (1 = logit, 2 = probit, 3 = cloglog) +link <- 1 + +# number of binomial trials per observation +nsize <- 10 + +## Backend settings +# number of draws per posterior approximation +ndraws <- 1000 + +# number of chains for hmc posterior approximation +nchains <- 2 + +fixed_args_binom <- list(nobs = nobs, nsize = nsize, link_type = 1, nsims = nsims, ndraws = ndraws, dist_types=list(eta="normal")) +``` + +# Inspecting DAP of binom-laplace +The generator function is used to draw simulated parameter samples and data which are then used to fit the actual model. + +```{R, warning=FALSE, error=FALSE} +generator_binom <- function(lambdas, fixed_args){ + # fixed value across simulated datasets + # experiment settings + nobs <- fixed_args$nobs + nsize <- fixed_args$nsize + dist_types <- fixed_args$dist_types + # modular settings + link_type <- fixed_args$link_type + + # generate + lambda_arg1 <- c() + lambda_arg2 <- c() + if(dist_types$eta == "normal"){ + eta <- rnorm(1, mean = lambdas$eta$mu, sd=lambdas$eta$sigma) + lambda_arg1 <- c(lambda_arg1, lambdas$eta$mu) + lambda_arg2 <- c(lambda_arg2, lambdas$eta$sigma) + } + else if(dist_types$eta == "gamma"){ + eta <- rgamma(1, shape = lambdas$eta$alpha, rate = lambdas$eta$beta) + lambda_arg1 <- c(lambda_arg1, lambdas$eta$alpha) + lambda_arg2 <- c(lambda_arg2, lambdas$eta$beta) + } + + + + mu <- invtf_param_vec(eta, link_type = link_type) + Y <- rbinom(nobs, size = nsize, prob = mu) + list( + parameters = list(eta = eta), + generated = list(nobs= nobs, nsize = nsize, link = link_type, + dist_types = match(unlist(dist_types), c("normal", "gamma")), lambda_arg1 = lambda_arg1, lambda_arg2 = lambda_arg2, + Y = Y) + ) +} +``` + +In short, DAP pools together all samples from each fits and calculates the summary statistics, which are used as the "DAP parameter". In this cell, we define the function which pools the parameter draws together, as well as calculating the bias and variance of the DAP estimate. + +Note that we can either set the prior of the parameter of interest to either normal or gamma. In this example we will stick with normal. + +```{R, warning=FALSE, error=FALSE} +# initial prior hyperparameters +lambda_init_binom <- list( + eta = list(mu=100, sigma=100) +) +datasets_binom <- generate_datasets(SBC_generator_function(generator_binom, lambda_init_binom, fixed_args_binom), n_datasets = fixed_args_binom$nsims) + +# hyperparameter update algorithm +updator = "mc_update" + +# maximal number of SBC iterations +niter <- 100 + +# tolerance +tol <- 0.1 + +# learning rate +gamma <- 1.5 # 0.5 for gradient update, 10 for normal_str_update + +# step2: inferring posterior +rstan_binom_mod <- stan_model("models/binom-laplace.stan") +cmdstan_binom_mod <- cmdstanr::cmdstan_model("models/binom-laplace.stan") + +backend_binom_opt <- SBC_backend_rstan_optimizing(rstan_binom_mod, draws = ndraws) +backend_binom_hmc <- SBC_backend_cmdstan_sample(cmdstan_binom_mod, chains = 4, iter_sampling = ndraws / 4) # thin = 10 +calib_generator <- function(lambdas, fixed_args){ + generate_datasets(SBC_generator_function(generator_binom, lambdas, fixed_args), n_datasets = fixed_args$nsims) +} + +calculate_dap <- function(mu, var, generator, datasets=NULL, backened, fixed_args){ + if(is.null(datasets)){ + lambda_init_binom <- list( + eta = list(mu=mu, sigma=sqrt(var)) + ) + datasets <- do.call(generator, list(lambda_init_binom, fixed_args = fixed_args)) + } + sbc_result <- compute_results(datasets, backened, thin_ranks = 1) + draws_eta <- c() + draws_Y <- c() + prior_thetas <- posterior::extract_variable(datasets$parameters, "eta") + var_theta_tilde_bar <- 0 + var_theta_tilde <- 0 + theta_bar <- mean(prior_thetas) + + B <- 0 + V <- 0 + for(i in 1:nsims){ + draws_Y <- c(draws_Y, datasets$generated[[i]]$Y) + samples <- SBC_fit_to_draws_matrix(sbc_result$fits[[i]]) + etas <- posterior::extract_variable(samples, "eta") + draws_eta <- c(draws_eta, etas) + + var_theta_tilde_bar <- var_theta_tilde_bar + (mean(etas) - mean(prior_thetas))^2 + var_theta_tilde <- var_theta_tilde + sum((etas - mean(etas))^2) + + B <- B + mean(etas) + V <- V + sum((etas - mean(etas))^2) + } + + var_theta_tilde_bar <- var_theta_tilde_bar / fixed_args$nsims + var_theta_tilde <- var_theta_tilde / (fixed_args$nsims * ndraws) + + B <- B / fixed_args$nsims - mean(prior_thetas) + V <- V / (fixed_args$nsims * ndraws) + + # assume normal for dap + mu <- mean(draws_eta) + var <- sd(draws_eta)^2 + #var <- 100 + + return(list(mu=mu, var=var, draws_eta=draws_eta, draws_Y=draws_Y, B=B, V=V, datasets=datasets)) +} +``` + +## + +Since we're trying to investigate the structure of DAP for multiple hyperparameter values, we will do a naive "grid search" where we fit the model with various hyperparameter values arranged in a grid. + +Once we have the model and DAP generation ready, we run inference each for HMC and optimization. The hypothesis is that HMC will be less sensitive to the designated prior, and hyperparameters retrieved from the DAP should match the specified hyperparameter. On the other hand, optimization should yield more interesting results. + +For this example, we've parameterized $\eta$ as $\eta \sim \mathrm{normal}(\mu, \sigma^2)$. We then run inference with different combinations of the hyperparameters: + +```{r, cache=TRUE} +gridsize_mu <- 5 +gridsize_var <- 5 +mu_seq <- seq(10, 50, length.out = gridsize_mu) +var_seq <- seq(5, 50, length.out = gridsize_var) +grid_size <- length(mu_seq) * length(var_seq) + +squared_lambda_diff <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +rownames(squared_lambda_diff) <- mu_seq +colnames(squared_lambda_diff) <- var_seq + +lambda_diff <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +rownames(lambda_diff) <- mu_seq +colnames(lambda_diff) <- var_seq + +dap_lambda_mu <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +dap_lambda_var <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +rownames(dap_lambda_mu) <- mu_seq +rownames(dap_lambda_var) <- mu_seq +colnames(dap_lambda_mu) <- var_seq +colnames(dap_lambda_var) <- var_seq + +lambda_mu <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +lambda_var <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +rownames(lambda_mu) <- mu_seq +rownames(lambda_var) <- mu_seq +colnames(lambda_mu) <- var_seq +colnames(lambda_var) <- var_seq + +var_theta_tilde_bar <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +var_theta_tilde <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +rownames(var_theta_tilde_bar) <- mu_seq +rownames(var_theta_tilde_bar) <- mu_seq +colnames(var_theta_tilde) <- var_seq +colnames(var_theta_tilde) <- var_seq + +B_ghat <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +V_ghat <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +rownames(B_ghat) <- mu_seq +rownames(V_ghat) <- mu_seq +colnames(B_ghat) <- var_seq +colnames(V_ghat) <- var_seq + +B_g <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +V_g <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +rownames(B_g) <- mu_seq +rownames(V_g) <- mu_seq +colnames(B_g) <- var_seq +colnames(V_g) <- var_seq + +dap_lambda_mu_hmc <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +dap_lambda_var_hmc <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +rownames(dap_lambda_mu_hmc) <- mu_seq +rownames(dap_lambda_var_hmc) <- mu_seq +colnames(dap_lambda_mu_hmc) <- var_seq +colnames(dap_lambda_var_hmc) <- var_seq + + + +theta_bar <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +rownames(theta_bar) <- mu_seq +rownames(theta_bar) <- mu_seq + +generated_Y <- array(rep(NA, nsims * nobs * grid_size), dim=c(nsims * nobs, gridsize_mu, gridsize_var) ) +dimnames(generated_Y)[[2]] <- mu_seq +dimnames(generated_Y)[[3]] <- var_seq + +for(j in 1:length(var_seq)){ + for(i in 1:length(mu_seq)){ + dap <- calculate_dap(mu_seq[[i]],var_seq[[j]] , calib_generator, NULL, backend_binom_opt, fixed_args_binom) + squared_lambda_diff[i,j] <- sqrt((mu_seq[[i]] - dap$mu)^2 + (var_seq[[j]] - dap$var)^2) + lambda_diff[i,j] <- (dap$mu - mu_seq[[i]]) + (dap$var - var_seq[[j]]) + dap_lambda_mu[i, j] <- dap$mu + dap_lambda_var[i, j] <- dap$var + lambda_mu[i, j] <- mu_seq[[i]] + lambda_var[i, j] <- var_seq[[j]] + generated_Y[, i, j] <- dap$draws_Y + B_ghat[i, j] <- dap$B + V_ghat[i, j] <- dap$V + + dap_hmc <- calculate_dap(mu_seq[[i]],var_seq[[j]], calib_generator, dap$datasets, backend_binom_hmc, fixed_args_binom) + B_g[i, j] <- dap_hmc$B + V_g[i, j] <- dap_hmc$V + dap_lambda_mu_hmc[i, j] <- dap_hmc$mu + dap_lambda_var_hmc[i, j] <- dap_hmc$var + } +} +``` + + + +```{R} +scaleFUN <- function(x) sprintf("%.2f", x) +dap_df <- as.data.frame.table(dap_lambda_mu) +colnames(dap_df)[1] <- "mu" +colnames(dap_df)[2] <- "var" +dap_df[, "mu"] <- as.numeric(as.vector(dap_df[, "mu"])) # not using as.vector converts the factor indices +dap_df[, "var"] <- as.numeric(as.vector(dap_df[, "var"])) +colnames(dap_df)[3] <- "dap_mu" +dap_lambda_df <- as.data.frame.table(dap_lambda_var) +dap_df[, "dap_var"] <- as.numeric(dap_lambda_df$Freq) +ggplot(dap_df, mapping=aes(x="mu", y="var")) + geom_point(aes(x=mu, y=var), color = "red") + geom_point(aes(x=dap_mu, y = dap_var), color = "blue") + scale_y_continuous(labels=scaleFUN) + geom_segment(aes(x=mu, y=var, xend=dap_mu, yend=dap_var), size=0.2) + xlab("mu") + ylab("var") + ggtitle("binom + opt, nsims=100") +``` + +The red points denote the initially specified hyperparameter values. Blue points denote the hyperparameter values recovered from the computed DAP. + +We can see that for optimization, almost all initialization points shrinks to near $(0, 0)$. This means that the hyperparameter area that we have tested are severly miscalibrated. + + +```{R} +scaleFUN <- function(x) sprintf("%.2f", x) +dap_df <- as.data.frame.table(dap_lambda_mu_hmc) +colnames(dap_df)[1] <- "mu" +colnames(dap_df)[2] <- "var" +dap_df[, "mu"] <- as.numeric(as.vector(dap_df[, "mu"])) # not using as.vector converts the factor indices +dap_df[, "var"] <- as.numeric(as.vector(dap_df[, "var"])) +colnames(dap_df)[3] <- "dap_mu" +dap_lambda_df <- as.data.frame.table(dap_lambda_var_hmc) +dap_df[, "dap_var"] <- as.numeric(dap_lambda_df$Freq) +ggplot(dap_df, mapping=aes(x="mu", y="var")) + geom_point(aes(x=mu, y=var), color = "red") + geom_point(aes(x=dap_mu, y = dap_var), color = "blue") + scale_y_continuous(labels=scaleFUN) + geom_segment(aes(x=mu, y=var, xend=dap_mu, yend=dap_var), size=0.2) + xlab("mu") + ylab("var") + ggtitle("binom + hmc, nsims=100") +``` + +The two plots look too similar. Quite skeptical, let's compare the actual recovered hyperparater values. We will check the mean hyperparameter values: +optimization: +```{R} +dap_lambda_mu +``` + +HMC: +```{R} +dap_lambda_mu_hmc +``` + + +We now plot the DAP bias-variance decomposition with respect to the hyperparameters, and varying by inference algorithms. The goal here is to observe whether HMC provides better results than optimization as per the hypothesis, and find any systematic anomalies regarding each values. + + +```{R, fig.width = 14} +B_g_df <- as.data.frame.table(B_g) +colnames(B_g_df)[colnames(B_g_df) == "Var1"] <- "Mu" +colnames(B_g_df)[colnames(B_g_df) == "Var2"] <- "Var" +colnames(B_g_df)[colnames(B_g_df) == "Freq"] <- "B_g" + +B_ghat_df <- as.data.frame.table(B_ghat) +colnames(B_ghat_df)[colnames(B_ghat_df) == "Var1"] <- "Mu" +colnames(B_ghat_df)[colnames(B_ghat_df) == "Var2"] <- "Var" +colnames(B_ghat_df)[colnames(B_ghat_df) == "Freq"] <- "B_ghat" + +B_ghat_df[, "B_g"] <- B_g_df[, "B_g"] + +B_ghat_df[, "coords"] <- paste(B_ghat_df[, "Mu"], B_ghat_df[, "Var"], sep=",") +B_ghat_df[, "coords"] <- factor(B_ghat_df$coords, levels=unique(B_ghat_df$coords)) + +ggplot(B_ghat_df) + geom_point(aes(x=coords, y=B_g, color="red")) + ggtitle("B(red=g(hmc), green=ghat(opt))") + geom_point(aes(x=coords, y=B_ghat, color="green")) + geom_segment(aes(x=coords, y=B_g, xend=coords, yend=B_ghat), size=0.2) + theme(axis.title.x = element_text(margin=unit(c(10, 0, 0, 0), "mm"))) + xlab("mu, var") + ylab("B") + theme(legend.position = "none") + +``` + +For bias, we can see that in general, bias increases with the mu hyperparameter. The plot is admittedly hard to see, but mu hyperparameter is the big factor here when it comes to to bias. We can see that the variance hyperparameter barely affects bias. + + +```{R, fig.width = 14} +V_g_df <- as.data.frame.table(V_g) +colnames(V_g_df)[colnames(V_g_df) == "Var1"] <- "Mu" +colnames(V_g_df)[colnames(V_g_df) == "Var2"] <- "Var" +colnames(V_g_df)[colnames(V_g_df) == "Freq"] <- "V_g" + +V_ghat_df <- as.data.frame.table(V_ghat) +colnames(V_ghat_df)[colnames(V_ghat_df) == "Var1"] <- "Mu" +colnames(V_ghat_df)[colnames(V_ghat_df) == "Var2"] <- "Var" +colnames(V_ghat_df)[colnames(V_ghat_df) == "Freq"] <- "V_ghat" + +V_ghat_df[, "V_g"] <- V_g_df[, "V_g"] + +V_ghat_df[, "coords"] <- paste(V_ghat_df[, "Mu"], V_ghat_df[, "Var"], sep="|") +V_ghat_df[, "coords"] <- factor(V_ghat_df$coords, levels=unique(V_ghat_df$coords)) + +ggplot(V_ghat_df) + geom_point(aes(x=coords, y=V_g, color="red")) + ggtitle("V(red=g(hmc), green=ghat(opt)") + geom_point(aes(x=coords, y=V_ghat, color="green")) + geom_segment(aes(x=coords, y=V_g, xend=coords, yend=V_ghat), size=0.2) + theme(axis.title.x = element_text(margin=unit(c(10, 0, 0, 0), "mm"))) + xlab("mu, var") + ylab("V") + theme(legend.position = "none") + +``` + +However, the variance component is a bit more interesting. We first observe that the variance component is inversely proportional to the variance parameter: when the variance hyperparameter is at its smallest value, 5, we can observe that the variance component is the biggest there. Unlike the bias component, the mean parameter here doesn't have much impact. + + +We now run posterior predictive checks with the intent of verifying whether the fits can recover the initial hyperparameters. Let's select hyperparameter values that we know are problematic and run PPC: + +```{R} +# mu = 0, 5, 20, 40 +# var = 1, 5, 15 +test_mu <- 40 +test_var <- 15 +lambda_test <- list( + eta = list(mu=test_mu, sigma=sqrt(test_var)) +) +test_dataset <- generate_datasets(SBC_generator_function(generator_binom, lambda_test, fixed_args_binom), n_datasets = fixed_args_binom$nsims) +sbc_result <- compute_results(test_dataset, backend_binom_opt, thin_ranks = 1) + +#plot_ecdf_diff(sbc_result) +#plot_rank_hist(sbc_result) +#plot_sim_estimated(sbc_result) +# plot_sim_estimated(sbc_result, estimate="median") +test_eta <- c() +fit_matrix <- matrix(data=rep(NA, nsims * ndraws), nrow = nsims, ncol = ndraws) +for(i in 1:nsims){ + samples <- sbc_result$fits[[i]] + draws <- SBC_fit_to_draws_matrix(samples) + eta_vals <- posterior::extract_variable(draws, "eta") + fit_matrix[i, ] <- eta_vals + test_eta <- c(test_eta, eta_vals) +} + +x <- seq(-10, 40, length.out=10000) +eta_rep <- rnorm(ndraws, test_mu, sqrt(test_var)) +ppc_dens_overlay(eta_rep, fit_matrix) + +``` + + +As expected, the results show that the miscalibration is severe; the posterior data distribution is not in agreement from the initial parameters. + +Let's try some region that might perform a bit better: + +```{R} +# mu = 0, 5, 20, 40 +# var = 1, 5, 15 +test_mu <- 1 +test_var <- 5 +lambda_test <- list( + eta = list(mu=test_mu, sigma=sqrt(test_var)) +) +test_dataset <- generate_datasets(SBC_generator_function(generator_binom, lambda_test, fixed_args_binom), n_datasets = fixed_args_binom$nsims) +sbc_result <- compute_results(test_dataset, backend_binom_opt, thin_ranks = 1) + +#plot_ecdf_diff(sbc_result) +#plot_rank_hist(sbc_result) +#plot_sim_estimated(sbc_result) +# plot_sim_estimated(sbc_result, estimate="median") +test_eta <- c() +fit_matrix <- matrix(data=rep(NA, nsims * ndraws), nrow = nsims, ncol = ndraws) +for(i in 1:nsims){ + samples <- sbc_result$fits[[i]] + draws <- SBC_fit_to_draws_matrix(samples) + eta_vals <- posterior::extract_variable(draws, "eta") + fit_matrix[i, ] <- eta_vals + test_eta <- c(test_eta, eta_vals) +} + +x <- seq(-10, 40, length.out=10000) +eta_rep <- rnorm(ndraws, test_mu, sqrt(test_var)) +ppc_dens_overlay(eta_rep, fit_matrix) + +``` + +```{R, eval=FALSE} + +normal_pdf <- dnorm(x, test_mu, sqrt(test_var)) +eta_draws <- sbc_result$fits[[5]]$fit_list$theta_tilde[, "eta"] +#ggplot(pdf_df) + geom_line(aes(x=x, y=normal_pdf, colour="red")) + geom_density(data=data.frame(val=eta_draws), aes(val, color="blue")) +``` + + +```{R, eval=FALSE} +# maximize entropy +# partial derivative is: -log p_w - 1 +calc_entropy <- function(p){ + return(as.numeric((p + 1e-8) %*% -log(p + 1e-8)) - 100 * (sum(p) - 1)^2) +} + +calc_entropy_grad <- function(p){ + return(-log(p + 1e-8) - 1 - 200 * (p + 1e-8)) +} + +# equal to minimize negative entropy +neg_entropy <- function(p){ + return(-calc_entropy(p)) +} + +neg_entropy_grad <- function(p){ + return(-calc_entropy_grad(p)) +} + +theta <- 14 +# partial derivative is contraction_arr_{w} +p_v_product_constraint <- function(p){ + # equality constraint should equal 0 + return(as.numeric(p %*% as.vector(contraction_arr)) - 6) +} + +p_v_product_constraint_grad <- function(p){ + return(as.vector(contraction_arr)) +} + +# calculate theta and p_init +# start at uniform +p_init <- as.numeric(c(rep(1e-8, grid_size - 1), 1)) + +prob_ineq_constraints <- function(p){ + # constraints <- c(p_v_product_constraint(p), probability_simplex_constraint(p)) + # grad <- c(p_v_product_constraint_grad(p), probability_simplex_constraint_grad(p)) + constraints <- c(p_v_product_constraint(p)) + grad <- c(p_v_product_constraint_grad(p)) + return( list(constraints=constraints, jacobian=grad) ) +} + +#If you want to use equality constraints, then you should use one of these algorithms NLOPT_LD_AUGLAG, NLOPT_LN_AUGLAG, NLOPT_LD_AUGLAG_EQ, NLOPT_LN_AUGLAG_EQ, NLOPT_GN_ISRES, NLOPT_LD_SLSQP + +probability_simplex_opts <- list(algorithm="NLOPT_LD_SLSQP", print_level=3, xtol_rel=1e-8, maxeval=1000) +prob_lb <- rep(0.0, length(p_init)) +prob_ub <- rep(1.0, length(p_init)) +prob_opt <- nloptr::nloptr(p_init, + neg_entropy, + eval_grad_f = neg_entropy_grad, + eval_g_ineq = prob_ineq_constraints, + opts = probability_simplex_opts, + lb=prob_lb, ub=prob_ub) +``` + +```{R, eval=FALSE} + +result_out <- array(prob_opt$solution, dim = c(length(mu_seq), length(var_seq))) +rownames(result_out) <- mu_seq +colnames(result_out) <- var_seq +persp3D(x=mu_seq, y=var_seq, z = result_out, theta=55, phi=10, xlab="mu", ylab="var", zlab="prob_opt_result", ticks=5, ticktype="detailed") +persp3D(x=mu_seq, y=var_seq, z = contraction_arr, theta=25, phi=10, xlab="mu", ylab="var", zlab="convex if ", ticks=5, ticktype="detailed") +``` + +```{R, eval=FALSE} +N = 10000 +sampled_probs <- sample(1:grid_size, N, prob=as.vector(result_out), replace=TRUE) +mu <- rep(mu_seq, length(var_seq)) +sigma <- rep(var_seq, each=length(mu_seq)) +combined <- data.frame(mu=mu, sigma=sigma) +thetas <- c() +for(i in 1:N){ + thetas[i] <- rnorm(1, combined[sampled_probs[i], "mu"], combined[sampled_probs[i], "sigma"]) +} +hist(thetas, probability=TRUE, breaks=30) +``` diff --git a/vignettes/DAP_gamma.Rmd b/vignettes/DAP_gamma.Rmd new file mode 100644 index 0000000..b7af16f --- /dev/null +++ b/vignettes/DAP_gamma.Rmd @@ -0,0 +1,256 @@ +--- +title: "DAP operator inspection - gamma" +author: "Hyunji Moon" +output: + html_document: default + pdf_document: default +--- +```{r setup, include=FALSE, warning=FALSE} +library(SBC) +library(cmdstanr) +library(parallel) +library(bayesplot) +library(posterior) +library(dplyr) +library(rstan) +library(future) +library(ggpubr) +library(rstanarm) +library(ggplot2) +library(mclust) +library(plot3D) +options(mc.cores = parallel::detectCores()) +options(SBC.min_chunk_size = 5) +set.seed(1984) +``` + +We introduce a self-calibration algorithm. First, dap operator: $P \rightarrow P$ is $\hat{g}(f(\Phi X, U), \Phi X)$. Limiting to normal is inspected to verify its convexity and contraction characteristic. + +```{R} +## Generator settings +# number of SBC simulations per iteration (generator) +nsims <- 200 + +# number of observations +nobs <- 10 + +# link function (1 = logit, 2 = probit, 3 = cloglog) +link <- 1 + +# number of binomial trials per observation +nsize <- 10 + +## Backend settings +# number of draws per posterior approximation +ndraws <- 100 + +# number of chains for hmc posterior approximation +nchains <- 2 +``` + +#Inspecting DAP of binom-laplace +When +A1. |x-T(x)| is Convex +A2. T is a contraction: |T(x) - T(Tx)| < |x-T(x)|, i.e., T is a "contraction". +With update algorithm B(x) = px+qT(x) (p+q = 1), +|px+qT(x) - T(px+qT(x))| < p |x-T(x)| + q |T(x) - T(T(x))| (A1)< |x-T(x)| (A2) and therefore the gap decreases. We empirically inspect this on Gamma-regression case. +- convex (global) vs continuos (wiggly, can ) +For $f^-1([0,\epsilon])$ to be compact, f needs to be convex. + +A1 is about blackbox T (reward). +A2 is about the quality of our update T (action). + +# Inspecting DAP of gamma-regression-hmc +## Goal: CRP convex set (2d conditioned from 4d) construction + +````{R} +generator_gr <- function(lambdas, fixed_args){ + # fixed value across simulated datasets + ## meta + nobs <- fixed_args$nobs + K <- fixed_args$K + dist_types <- fixed_args$dist_types + while(TRUE){ + # predictor + X <- array(rnorm(nobs * K, mean = 1, sd = 0.5), dim = c(nobs, K)) + b <- rnorm(K, mean = 0, sd = 1) + # generate + lambda_arg1 <- c() + lambda_arg2 <- c() + if(dist_types$shape == "normal"){ + shape <- rnorm(1, mean = lambdas$shape$mu, sd=lambdas$shape$sigma) + lambda_arg1 <- c(lambda_arg1, lambdas$shape$mu) + lambda_arg2 <- c(lambda_arg2, lambdas$shape$sigma) + } + else if(dist_types$shape == "gamma"){ + shape <- rgamma(1, shape = lambdas$shape$alpha, rate = lambdas$shape$beta) + lambda_arg1 <- c(lambda_arg1, lambdas$shape$alpha) + lambda_arg2 <- c(lambda_arg2, lambdas$shape$beta) + } + else if(dist_types$shape == "lognormal"){ + shape <- rlnorm(1, meanlog = lambdas$shape$mu, sdlog = lambdas$shape$sigma) + lambda_arg1 <- c(lambda_arg1, lambdas$shape$mu) + lambda_arg2 <- c(lambda_arg2, lambdas$shape$sigma) + } + + if(dist_types$a == "normal"){ + a <- rnorm(1, mean = lambdas$a$mu, sd=lambdas$a$sigma) + lambda_arg1 <- c(lambda_arg1, lambdas$a$mu) + lambda_arg2 <- c(lambda_arg2, lambdas$a$sigma) + } + #a <- rnorm(1, mean = 2, sd = 5) + logmu <- as.numeric(a + X %*% b) + mu <- exp(logmu) + Y <- rgamma(nobs, shape = shape, rate = shape / mu) + + if(!any(Y <= 1e-32)){ + return(list( + parameters = list(shape = shape), + generated = list(nobs= nobs, K = K, X = X, dist_types = match(unlist(dist_types), c("normal", "gamma")), + lambda_arg1 = lambda_arg1, lambda_arg2 = lambda_arg2, Y = Y) + ) + ) + } + } +} + +fixed_args_gr <- list(nobs = nobs, K = 15, nsims = nsims, dist_types=list(shape="normal", a="normal")) +cmdstan_mod_gr <- cmdstanr::cmdstan_model("models/gamma-reg.stan") +rstan_mod_gr <- stan_model("models/gamma-reg.stan") +backend_gr_opt <- SBC_backend_rstan_optimizing(rstan_mod_gr, draws = ndraws) +backend_gr_hmc <- SBC_backend_cmdstan_sample(cmdstan_mod_gr, chains = nchains, iter_sampling = ndraws / nchains) + +# combines target hp values with other hyperparameter settings +calib_generator <- function(lambda_init_gamma, fixed_args){ + generate_datasets(SBC_generator_function(generator_gr, lambda_init_gamma, fixed_args), n_datasets = fixed_args_gamma$nsims) +} +``` + +```{R, warning = FALSE, error = FALSE, dap with two hp} +calculate_dap <- function(mu, var, generator, backened, fixed_args){ + lambda_init_gamma <- list( + shape = list(mu= mu, sigma = sqrt(var)), #list(alpha= mu^2 / var, beta= mu / var), + a = list(mu= 2, sigma = 1) + ) + datasets <- do.call(generator, list(lambda_init_gamma, fixed_args = fixed_args)) + sbc_result <- compute_results(datasets, backened, thin_ranks = 1) + draws_eta <- c() + for(fit in sbc_result$fits){ + samples <- SBC_fit_to_draws_matrix(fit) + draws_eta <- c(draws_eta, posterior::extract_variable(samples, "shape")) + } + # assume normal for dap + mu <- mean(draws_eta) + var <- sd(draws_eta)^2 + # gamma_est <- MASS::fitdistr(draws_eta, "gamma", start=list(shape=1, rate=1))$estimate + # alpha <- as.numeric(gamma_est["shape"]) + # beta <- as.numeric(gamma_est["rate"]) + # mu = alpha / beta + # var = alpha / beta^2 + return(list(mu=mu, var=var, draws_eta=draws_eta)) +} +true_mu <- 20 +true_var <- 2 +mu_vals <- true_mu + seq(-10, 10, length.out = 6) +var_vals <- true_var + seq(-1, 1, length.out = 3) +results <- rep(NA, length(mu_vals)) +dap_mu <- rep(NA, length(mu_vals)) + +# (x-T(x)^2 +calib <-array(rep(NA, length(mu_vals) * length(var_vals)), dim = c(length(mu_vals), length(var_vals))) +# (x-r)^2 +real <- array(rep(NA, length(mu_vals) * length(var_vals)), dim = c(length(mu_vals), length(var_vals))) +# convex objective +convex <- array(rep(NA, length(mu_vals) * length(var_vals)), dim = c(length(mu_vals), length(var_vals))) +# contraction of T i.e. |T(x)-T(T(x))| < |x-T(x)| with different norms +contraction <- array(rep(NA, length(mu_vals) * length(var_vals)), dim = c(length(mu_vals), length(var_vals))) + +colnames(calib) <- var_vals +rownames(calib) <- mu_vals +colnames(real) <- var_vals +rownames(real) <- mu_vals +colnames(convex) <- var_vals +rownames(convex) <- mu_vals +colnames(contraction) <- var_vals +rownames(contraction) <- mu_vals + +for(i in 1:length(mu_vals)){ + for(j in 1:length(var_vals)){ + dap <- calculate_dap(mu_vals[[i]],var_vals[[j]] , calib_generator, backend_gr_opt, fixed_args_gr) + dapdap <- calculate_dap(dap$mu, dap$var, calib_generator, backend_gr_opt, fixed_args_gr) + calib[i,j] <- (mu_vals[[i]] - dap$mu)^2 + (var_vals[[j]] - dap$var)^2 + real[i,j] <- (mu_vals[[i]] - 50)^2 + (var_vals[[i]] - 5)^2 + convex[i,j] <- calib[i,j] + real[i,j] + contraction[i,j] <- (mu_vals[[i]] - dap$mu)^2 + (var_vals[[j]] - dap$var)^2 - ((dap$mu- dapdap$mu)^2 + (dap$var - dapdap$var)^2) + } +} + +persp3D(x=mu_vals, y=var_vals, z = convex, theta=55, phi=10, xlab="mu", ylab="sigma", zlab="convex if ", ticks=5, ticktype="detailed") +persp3D(x=mu_vals, y=var_vals, z = contraction, theta=55, phi=10, xlab="mu", ylab="var", zlab="contraction", ticks=5, ticktype="detailed") +``` +### 2. Condition on one hyperparameter (sigma) +calc_dap return sigma as always fixed value (a~ gibbs sampler) shape mu vs intercept mu +p3) sigma 1 +p4) sigma 0.1 +```{R, warning = FALSE, error = FALSE} +calculate_dap <- function(shape_mu, a_mu, generator, backened, fixed_args){ + # lambda_init_gamma <- list( + # shape = list(alpha= shape_mu^2 / 1^2, beta= shape_mu / 1^2), + # #a = list(alpha= a_mu^2 / 1^2, beta= a_mu / 1^2) + # a = list(mu=a_mu, sigma=1) + lambda_init_gamma <- list( + #shape=list(alpha=2, beta=1), + shape = list(mu = shape_mu, sigma=1), + a = list(mu = a_mu, sigma=1) + ) + datasets <- do.call(generator, list(lambda_init_gamma, fixed_args = fixed_args)) + sbc_result <- compute_results(datasets, backened, thin_ranks = 1) + draws_shape <- c() + draws_a <- c() + for(fit in sbc_result$fits){ + samples <- SBC_fit_to_draws_matrix(fit) + draws_shape <- c(draws_shape, posterior::extract_variable(samples, "shape")) ### TODO + draws_a <- c(draws_a, posterior::extract_variable(samples, "a")) + } + # assume fixed varss for dap + + #gamma_est <- MASS::fitdistr(draws_shape, "gamma", start=list(shape=1, rate=1))$estimate + #alpha <- as.numeric(gamma_est["shape"]) + #beta <- as.numeric(gamma_est["rate"]) + #shape_mu = alpha / beta + #shape_var = alpha / beta^2 + shape_mu <- mean(draws_shape) + a_mu <- mean(draws_a) + var <- 1 + return(list(shape_mu=shape_mu, a_mu=a_mu, var=var, draws_shape=draws_shape, draws_a=draws_a)) +} + +fixed_args_gr <- list(nobs = nobs, K = 15, nsims = nsims, dist_types=list(shape="normal", a="normal")) +prior_dap <- list(shape_mu = c(), a_mu = c(), dap_shape_mu = c(), dap_a_mu = c(), dap_var = c(), shape_mu_loss = c(), a_mu_loss = c(), var_loss = c()) +for (shape_mu in seq(10, 100, length.out = 10)){ + for(a_mu in seq(1, 5, length.out = 10)){ + prior_dap$shape_mu <- c(prior_dap$shape_mu, shape_mu) + prior_dap$a_mu <- c(prior_dap$a_mu, a_mu) + dap <- calculate_dap(shape_mu, a_mu, calib_generator, backend_gr_hmc, fixed_args_gr) + prior_dap$dap_shape_mu <- c(prior_dap$dap_shape_mu, dap$shape_mu) + prior_dap$dap_a_mu <- c(prior_dap$dap_a_mu, dap$a_mu) + prior_dap$dap_var <- c(prior_dap$dap_var, dap$var) + prior_dap$shape_mu_loss <- c(prior_dap$shape_mu_loss, shape_mu - dap$shape_mu) + prior_dap$a_mu_loss <- c(prior_dap$a_mu_loss, a_mu - dap$a_mu) + prior_dap$var_loss <- c(prior_dap$var_loss, 1 - dap$var) + } +} + + +shape_mu_loss_mat <- array(rep(NA, 10), dim=c(10, 10)) +rownames(shape_mu_loss_mat) <- seq(10, 100, length.out = 10) +colnames(shape_mu_loss_mat) <- seq(1, 5, length.out = 10) +for(i in 1:10){ + for(j in 1:10){ + shape_mu_loss_mat[i, j] <- prior_dap$shape_mu_loss[10 * (i - 1) + j]^2 + prior_dap$a_mu_loss[10 * (i - 1) + j]^2 + shape_mu_loss_mat[i, j] <- shape_mu_loss_mat[i, j] + (prior_dap$dap_shape_mu[10 * (i - 1) + j] - 50)^2 + (prior_dap$dap_a_mu[10 * (i - 1) + j] - 3)^2 + } +} +library(plot3D) +persp3D(x=seq(10, 100, length.out = 10), y=seq(1, 5, length.out = 10), z = shape_mu_loss_mat, theta=-30, phi=50, xlab="shape_mu", ylab="a_mu", zlab="convex") +``` diff --git a/vignettes/DAP_gamma_optimization.Rmd b/vignettes/DAP_gamma_optimization.Rmd new file mode 100644 index 0000000..b798c88 --- /dev/null +++ b/vignettes/DAP_gamma_optimization.Rmd @@ -0,0 +1,527 @@ +--- +title: "self-calibration-adaptive" +author: "Hyunji Moon, Shinyoung Kim" +output: + html_document: default + pdf_document: default +--- +```{r setup, include=FALSE, warning=FALSE} +library(SBC) +library(cmdstanr) +library(parallel) +library(bayesplot) +library(posterior) +library(dplyr) +library(rstan) +library(future) +library(ggpubr) +library(rstanarm) +library(ggplot2) +library(mclust) +library(plot3D) +options(mc.cores = parallel::detectCores()) +options(SBC.min_chunk_size = 5) +#set.seed(1984) +``` + + +```{R, warning=FALSE, error=FALSE} +## Generator settings +# number of SBC simulations per iteration (generator) +nsims <- 200 + +# number of observations +nobs <- 10 + +# link function (1 = logit, 2 = probit, 3 = cloglog) +link <- 1 + +# number of binomial trials per observation +nsize <- 10 + +## Backend settings +# number of draws per posterior approximation +ndraws <- 1000 + +# number of chains for hmc posterior approximation +nchains <- 2 +``` + +```{R} +generator_gr <- function(lambdas, fixed_args){ + # fixed value across simulated datasets + ## meta + nobs <- fixed_args$nobs + K <- fixed_args$K + dist_types <- fixed_args$dist_types + while(TRUE){ + # predictor + X <- array(rnorm(nobs * K, mean = 1, sd = 0.5), dim = c(nobs, K)) + b <- rnorm(K, mean = 0, sd = 1) + # generate + lambda_arg1 <- c() + lambda_arg2 <- c() + if(dist_types$shape == "normal"){ + shape <- rnorm(1, mean = lambdas$shape$mu, sd=lambdas$shape$sigma) + lambda_arg1 <- c(lambda_arg1, lambdas$shape$mu) + lambda_arg2 <- c(lambda_arg2, lambdas$shape$sigma) + } + else if(dist_types$shape == "gamma"){ + shape <- rgamma(1, shape = lambdas$shape$alpha, rate = lambdas$shape$beta) + lambda_arg1 <- c(lambda_arg1, lambdas$shape$alpha) + lambda_arg2 <- c(lambda_arg2, lambdas$shape$beta) + } + else if(dist_types$shape == "lognormal"){ + shape <- rlnorm(1, meanlog = lambdas$shape$mu, sdlog = lambdas$shape$sigma) + lambda_arg1 <- c(lambda_arg1, lambdas$shape$mu) + lambda_arg2 <- c(lambda_arg2, lambdas$shape$sigma) + } + + if(dist_types$a == "normal"){ + a <- rnorm(1, mean = lambdas$a$mu, sd=lambdas$a$sigma) + lambda_arg1 <- c(lambda_arg1, lambdas$a$mu) + lambda_arg2 <- c(lambda_arg2, lambdas$a$sigma) + } + #a <- rnorm(1, mean = 2, sd = 5) + logmu <- as.numeric(a + X %*% b) + mu <- exp(logmu) + Y <- rgamma(nobs, shape = shape, rate = shape / mu) + + if(!any(Y <= 1e-32)){ + return(list( + parameters = list(shape = shape), + generated = list(nobs= nobs, K = K, X = X, dist_types = match(unlist(dist_types), c("normal", "gamma", "lognormal")), + lambda_arg1 = lambda_arg1, lambda_arg2 = lambda_arg2, Y = Y) + ) + ) + } + } +} + +fixed_args_gr <- list(nobs = nobs, K = 15, nsims = nsims, dist_types=list(shape="lognormal", a="normal")) +cmdstan_mod_gr <- cmdstanr::cmdstan_model("models/gamma-reg.stan") +rstan_mod_gr <- stan_model("models/gamma-reg.stan") +backend_gr_opt <- SBC_backend_rstan_optimizing(rstan_mod_gr, draws = ndraws) +backend_gr_hmc <- SBC_backend_cmdstan_sample(cmdstan_mod_gr, chains = nchains, iter_sampling = ndraws / nchains) + +# combines target hp values with other hyperparameter settings +calib_generator <- function(lambda_init_gamma, fixed_args){ + generate_datasets(SBC_generator_function(generator_gr, lambda_init_gamma, fixed_args), n_datasets = fixed_args_gr$nsims) +} + +calculate_dap <- function(mu, var, generator, datasets=NULL, backened, fixed_args){ + if(is.null(datasets)){ + lambda_init_gamma <- list( + shape = list(mu= mu, sigma = sqrt(var)), #list(alpha= mu^2 / var, beta= mu / var), + a = list(mu= 2, sigma = 1) + ) + datasets <- do.call(generator, list(lambda_init_gamma, fixed_args = fixed_args)) + } + + sbc_result <- compute_results(datasets, backened, thin_ranks = 1) + draws_eta <- c() + draws_Y <- c() + prior_thetas <- posterior::extract_variable(datasets$parameters, "shape") + var_theta_tilde_bar <- 0 + var_theta_tilde <- 0 + B <- 0 + V <- 0 + for(i in 1:nsims){ + draws_Y <- c(draws_Y, datasets$generated[[i]]$Y) + samples <- SBC_fit_to_draws_matrix(sbc_result$fits[[i]]) + etas <- posterior::extract_variable(samples, "shape") + draws_eta <- c(draws_eta, etas) + + var_theta_tilde_bar <- var_theta_tilde_bar + (mean(etas) - mean(prior_thetas))^2 + var_theta_tilde <- var_theta_tilde + sum((etas - mean(etas))^2) + + B <- B + mean(etas) + V <- V + sum((etas - mean(etas))^2) + } + var_theta_tilde_bar <- var_theta_tilde_bar / fixed_args$nsims + var_theta_tilde <- var_theta_tilde / (fixed_args$nsims * ndraws) + + B <- B / fixed_args$nsims - mean(prior_thetas) + V <- V / (fixed_args$nsims * ndraws) + + # assume normal for dap + mu <- mean(draws_eta) + var <- sd(draws_eta)^2 + + return(list(mu=mu, var=var, draws_eta=draws_eta, draws_Y=draws_Y, B=B, V=V, datasets=datasets)) +} +``` + +## + +```{r} +gridsize_mu <- 5 +gridsize_var <- 5 +mu_seq <- seq(0, 50, length.out = gridsize_mu) +var_seq <- seq(5, 25, length.out = gridsize_var) +grid_size <- length(mu_seq) * length(var_seq) + +squared_lambda_diff <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +rownames(squared_lambda_diff) <- mu_seq +colnames(squared_lambda_diff) <- var_seq + +lambda_diff <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +rownames(lambda_diff) <- mu_seq +colnames(lambda_diff) <- var_seq + +dap_lambda_mu <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +dap_lambda_var <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +rownames(dap_lambda_mu) <- mu_seq +rownames(dap_lambda_var) <- mu_seq +colnames(dap_lambda_mu) <- var_seq +colnames(dap_lambda_var) <- var_seq + +lambda_mu <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +lambda_var <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +rownames(lambda_mu) <- mu_seq +rownames(lambda_var) <- mu_seq +colnames(lambda_mu) <- var_seq +colnames(lambda_var) <- var_seq + +var_theta_tilde_bar <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +var_theta_tilde <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +rownames(var_theta_tilde_bar) <- mu_seq +rownames(var_theta_tilde_bar) <- mu_seq +colnames(var_theta_tilde) <- var_seq +colnames(var_theta_tilde) <- var_seq + +B_ghat <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +V_ghat <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +rownames(B_ghat) <- mu_seq +rownames(V_ghat) <- mu_seq +colnames(B_ghat) <- var_seq +colnames(V_ghat) <- var_seq + +B_g <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +V_g <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +rownames(B_g) <- mu_seq +rownames(V_g) <- mu_seq +colnames(B_g) <- var_seq +colnames(V_g) <- var_seq + + + +theta_bar <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +rownames(theta_bar) <- mu_seq +rownames(theta_bar) <- mu_seq + +generated_Y <- array(rep(NA, nsims * nobs * grid_size), dim=c(nsims * nobs, gridsize_mu, gridsize_var) ) +dimnames(generated_Y)[[2]] <- mu_seq +dimnames(generated_Y)[[3]] <- var_seq +for(j in 1:length(var_seq)){ + for(i in 1:length(mu_seq)){ + dap <- calculate_dap(mu_seq[[i]],var_seq[[j]] , calib_generator, NULL, backend_gr_hmc, fixed_args_gr) + squared_lambda_diff[i,j] <- sqrt((mu_seq[[i]] - dap$mu)^2 + (var_seq[[j]] - dap$var)^2) + lambda_diff[i,j] <- (dap$mu - mu_seq[[i]]) + (dap$var - var_seq[[j]]) + dap_lambda_mu[i, j] <- dap$mu + dap_lambda_var[i, j] <- dap$var + lambda_mu[i, j] <- mu_seq[[i]] + lambda_var[i, j] <- var_seq[[j]] + generated_Y[, i, j] <- dap$draws_Y + B_ghat[i, j] <- dap$B + V_ghat[i, j] <- dap$V + + #dap_hmc <- calculate_dap(mu_seq[[i]],var_seq[[j]], calib_generator, dap$datasets, backend_gr_hmc, fixed_args_gr) + B_g[i, j] <- dap$B + V_g[i, j] <- dap$V + } +} +``` + +```{R} +scaleFUN <- function(x) sprintf("%.2f", x) +dap_df <- as.data.frame.table(dap_lambda_mu) +colnames(dap_df)[1] <- "mu" +colnames(dap_df)[2] <- "var" +dap_df[, "mu"] <- as.numeric(as.vector(dap_df[, "mu"])) # not using as.vector converts the factor indices +dap_df[, "var"] <- as.numeric(as.vector(dap_df[, "var"])) +colnames(dap_df)[3] <- "dap_mu" +dap_lambda_df <- as.data.frame.table(dap_lambda_var) +dap_df[, "dap_var"] <- as.numeric(dap_lambda_df$Freq) +ggplot(dap_df, mapping=aes(x="mu", y="var")) + geom_point(aes(x=mu, y=var), color = "red") + geom_point(aes(x=dap_mu, y = dap_var), color = "blue") + scale_y_continuous(labels=scaleFUN) + geom_segment(aes(x=mu, y=var, xend=dap_mu, yend=dap_var), size=0.2) + xlab("mu") + ylab("var") + ggtitle("gamma + hmc") + +``` + + + + +```{R} +persp3D(x=mu_seq, y=var_seq, z = squared_lambda_diff, theta=90, phi=10, xlab="mu", ylab="var", zlab="convex if ", ticks=5, ticktype="detailed") + +``` +```{R} +persp3D(x=mu_seq, y=var_seq, z=lambda_diff, theta=90, phi=10, ticks=5, ticktype="detailed") +``` + +```{R} +B_df <- as.data.frame.table(B) +colnames(B_df)[colnames(B_df) == "Var1"] <- "Mu" +colnames(B_df)[colnames(B_df) == "Var2"] <- "Var" +ggplot(B_df) + geom_point(aes(x=Mu, y=Freq)) + geom_line(aes(x=Mu, y=Freq, group=1)) + ggtitle("B") +``` + +```{R} +V_df <- as.data.frame.table(V) +colnames(V_df)[colnames(V_df) == "Var1"] <- "Mu" +colnames(V_df)[colnames(V_df) == "Var2"] <- "Var" +ggplot(V_df) + geom_point(aes(x=Mu, y=Freq)) + geom_line(aes(x=Mu, y=Freq, group=1)) + ggtitle("V") +``` + +```{R} +for(j in 1:length(var_seq)){ + for(i in 1:length(mu_seq)){ + print(sprintf("mu: %f var: %f", lambda_mu[i, j], lambda_var[i, j])) + bias_term <- sum((theta_bar[i, j] - lambda_mu[i, j])^2) + var_term_1 <- sum(var_theta_tilde_bar[i, j]) + var_term_2 <- sum(var_theta_tilde[i, j]) + total <- bias_term + var_term_1 + var_term_2 + B <- var_theta_tilde_bar[i, j] - + print(sprintf("term1: %f term2: %f term3: %f total: %f", bias_term / total, var_term_1 / total , var_term_2 /total, total)) + } +} +``` + +Define objective and constraint functions + +```{R} +# maximize entropy +# partial derivative is: -log p_w - 1 +calc_entropy <- function(p){ + return(as.numeric((p + 1e-8) %*% -log(p + 1e-8)) - 1000 * (sum(p) - 1)^2) +} + +calc_entropy_grad <- function(p){ + return(-log(p + 1e-8) - 1 - 2000 * (p + 1e-8)) +} + +calc_mse <- function(p){ + mean((p *(dap_lambda_mu - lambda_mu))^2 + (p *(dap_lambda_var - lambda_var))^2) +} +calc_mse_grad <- function(p){ + n_p = length(p) + (2 * (p *(dap_lambda_mu - lambda_mu) * (dap_lambda_mu - lambda_mu)) + (2 * (p *(dap_lambda_var - lambda_var)) * (dap_lambda_var - lambda_var))) / n_p +} + +calc_mse2 <- function(p){ + n <- length(p) + p <- array(p, dim=c(1, n)) + + lambda_mu_vec <- array(lambda_mu, dim=c(1, n)) + theta_bar_vec <- array(theta_bar, dim=c(1, n)) + var_theta_tilde_bar_vec <- array(var_theta_tilde_bar, dim=c(1, n)) + var_theta_tilde_vec <- array(var_theta_tilde, dim=c(1, n)) + + bias_term <- sum(p * (theta_bar_vec - lambda_mu_vec)^2) + var_term <- sum(p * var_theta_tilde_bar_vec) + sum(p * var_theta_tilde_vec) + + return(bias_term + var_term) +} + +calc_mse2_grad <- function(p){ + n <- length(p) + p <- array(p, dim=c(1, n)) # dim(1, n) + + lambda_mu_vec <- array(lambda_mu, dim=c(1, n)) + theta_bar_vec <- array(theta_bar, dim=c(1, n)) + var_theta_tilde_bar_vec <- array(var_theta_tilde_bar, dim=c(1, n)) + var_theta_tilde_vec <- array(var_theta_tilde, dim=c(1, n)) + + + bias_grad <- (theta_bar_vec - lambda_mu_vec)^2 / n + var_grad <- var_theta_tilde_bar_vec / n + var_theta_tilde_vec + + return(bias_grad + var_grad) + + # grad backup (bias^2 + var) + #2 * sum(p2 * diff) * diff + 2 * (t_lambda - sum(p2 * t_lambda)) * -(p2 * t_lambda) + (t_lambda - sum(p2 * t_lambda))^2 +} + +# equal to minimize negative entropy +neg_entropy <- function(p){ + return(-calc_entropy(p)) +} + +neg_entropy_grad <- function(p){ + return(-calc_entropy_grad(p)) +} + +theta <- 14 +# partial derivative is squared_lambda_diff_{w} +p_v_product_constraint <- function(p){ + # equality constraint should equal 0 + return(as.numeric(p %*% as.vector(squared_lambda_diff)) - 5) +} + +p_v_product_constraint_grad <- function(p){ + return(as.vector(squared_lambda_diff)) +} + +probability_simplex_constraint <- function(p){ + return(sum(p) - 1) +} + +probability_simplex_constraint_grad <- function(p){ + return(rep(1, length(p))) +} + +# calculate theta and p_init + + +prob_ineq_constraints <- function(p){ + #constraints <- c(p_v_product_constraint(p), probability_simplex_constraint(p)) + #grad <- c(p_v_product_constraint_grad(p), probability_simplex_constraint_grad(p)) + constraints <- c(p_v_product_constraint(p)) + grad <- c(p_v_product_constraint_grad(p)) + return( list(constraints=constraints, jacobian=grad) ) +} + +prob_eq_constraints <- function(p){ + constraints <- c(probability_simplex_constraint(p)) + grad <- c(probability_simplex_constraint_grad(p)) + return( list(constraints=constraints, jacobian=grad) ) +} + +mse2_eq_constraints <- function(p){ + N <- length(p) + n <- N/2 + constraints <- c() + jacobian <- array(rep(NA * N * (n+2)), dim=c(n+2, N)) + + for(i in 1:n){ + constraints[i] <- p[i] - p[n + i] + grad_vec <- rep(0, N) + grad_vec[i] <- 1 + grad_vec[n + i] <- -1 + jacobian[i, ] <- grad_vec + } + constraints[length(constraints) + 1] <- sum(p[1:n]) - 1 + jacobian[i+1, ] <- c(rep(1, n), rep(0, n)) + + constraints[length(constraints) + 1] <- sum(p[(n+1):N]) - 1 + jacobian[i + 2, ] <- c(rep(0, n), rep(1, n)) + return( list(constraints=constraints, jacobian=jacobian) ) +} + +#If you want to use equality constraints, then you should use one of these algorithms NLOPT_LD_AUGLAG, NLOPT_LN_AUGLAG, NLOPT_LD_AUGLAG_EQ, NLOPT_LN_AUGLAG_EQ, NLOPT_GN_ISRES, NLOPT_LD_SLSQP + + +# start at uniform +p_init <- as.numeric(c(rep(1e-8, grid_size - 1), 1)) + +prob_lb <- rep(0.0, length(p_init)) +prob_ub <- rep(1.0, length(p_init)) + +probability_simplex_opts <- list(algorithm="NLOPT_LD_SLSQP", print_level=1, xtol_rel=1e-8, maxeval=1000, check_derivatives=TRUE) +prob_opt <- nloptr::nloptr(p_init, + neg_entropy, + eval_grad_f = neg_entropy_grad, + eval_g_ineq = prob_ineq_constraints, + opts = probability_simplex_opts, + lb=prob_lb, ub=prob_ub) + + +local_opts <- list(algorithm="NLOPT_LD_LBFGS", xtol_rel = 1.0e-8) +penalty_opts <- list(algorithm="NLOPT_LD_AUGLAG_EQ", print_level=1, xtol_rel=1e-8, maxeval=200, check_derivatives=TRUE, local_opts=local_opts) +mse_opts <- nloptr::nloptr(rep(1 / length(p_init), length(p_init)), + calc_mse, + eval_grad_f = calc_mse_grad, + eval_g_eq = prob_eq_constraints, + opts = penalty_opts, + lb=prob_lb, ub=prob_ub) + + +local_opts <- list(algorithm="NLOPT_LD_LBFGS", xtol_rel = 1.0e-8) +penalty_opts <- list(algorithm="NLOPT_LD_AUGLAG_EQ", print_level=1, xtol_rel=1e-8, maxeval=100, check_derivatives=TRUE, local_opts=local_opts) +mse_opts2 <- nloptr::nloptr(rep(1 / length(p_init), length(p_init)), + calc_mse2, + eval_grad_f = calc_mse2_grad, + eval_g_eq = prob_eq_constraints, + opts = penalty_opts, + lb=prob_lb, ub=prob_ub) + +``` + +```{R} + +result_out <- array(prob_opt$solution, dim = c(gridsize_mu, gridsize_var)) +rownames(result_out) <- mu_seq +colnames(result_out) <- var_seq +persp3D(x=mu_seq, y=var_seq, z = result_out, theta=25, phi=10, xlab="mu", ylab="var", zlab="prob_opt_result", ticks=5, ticktype="detailed") +persp3D(x=mu_seq, y=var_seq, z = squared_lambda_diff, theta=25, phi=10, xlab="mu", ylab="var", zlab="convex if ", ticks=5, ticktype="detailed") +``` +```{R} +result_dot_out <- array(min_sum_opts$solution, dim = c(gridsize_mu, gridsize_var)) +rownames(result_dot_out) <- mu_seq +colnames(result_dot_out) <- var_seq +persp3D(x=mu_seq, y=var_seq, z = result_dot_out, theta=25, phi=10, xlab="mu", ylab="var", zlab="prob_opt_result", ticks=5, ticktype="detailed") +``` + + +```{R} +N = 10000 +sampled_probs <- sample(1:grid_size, N, prob=as.vector(result_out), replace=TRUE) +mu <- rep(mu_seq, length(var_seq)) +sigma <- rep(var_seq, each=length(mu_seq)) +combined <- data.frame(mu=mu, sigma=sigma) +thetas <- c() +for(i in 1:N){ + thetas[i] <- rnorm(1, combined[sampled_probs[i], "mu"], combined[sampled_probs[i], "sigma"]) +} +hist(thetas, probability=TRUE, breaks=30) +``` + + +```{R} +library(ggnewscale) +n_bins = gridsize +lambdas <- data.frame(mu=numeric(), var=numeric(), density_entropy=numeric()) +for(i in 1:gridsize){ + for(j in 1:gridsize){ + lambdas[nrow(lambdas) + 1,] = list(mu=rownames(result_out)[i], var=colnames(result_out)[j], density_entropy=result_out[i, j]) + } +} +aggregated <- aggregate(lambdas$density_entropy, by=list(mu=lambdas$mu, var=lambdas$var), FUN=sum) +colnames(aggregated)[3] <- "density_entropy" + +lambdas_min_sum <- data.frame(mu=numeric(), var=numeric(), density_point=numeric()) +for(i in 1:gridsize){ + for(j in 1:gridsize){ + lambdas_min_sum[nrow(lambdas_min_sum) + 1,] = list(mu=rownames(result_dot_out)[i], var=colnames(result_dot_out)[j], density_point=result_dot_out[i, j]) + } +} +aggregated_min_sum <- aggregate(lambdas_min_sum$density_point, by=list(mu=lambdas_min_sum$mu, var=lambdas_min_sum$var), FUN=sum) +colnames(aggregated_min_sum)[3] <- "density_point" + +squared_lambda_diff_df <- data.frame(mu=numeric(), var=numeric(), distance=numeric()) +for(i in 1:gridsize){ + for(j in 1:gridsize){ + squared_lambda_diff_df[nrow(squared_lambda_diff_df) + 1,] = list(mu=rownames(squared_lambda_diff)[i], var=colnames(squared_lambda_diff)[j], distance=squared_lambda_diff[i, j]) + } +} +aggregated_squared_lambda_diff <- aggregate(squared_lambda_diff_df$distance, by=list(mu=squared_lambda_diff_df$mu, var=squared_lambda_diff_df$var), FUN=sum) +colnames(aggregated_squared_lambda_diff)[3] <- "distance" +range01 <- function(x){(x-min(x))/(max(x)-min(x))} +aggregated_squared_lambda_diff$distance <- range01(aggregated_squared_lambda_diff$distance) + +aggregated$density_point <- aggregated_min_sum$density_point + +scaleFUN <- function(x) sprintf("%.2f", as.numeric(x)) +ggplot(mapping=aes(x, y)) + geom_tile(data=aggregated, mapping=aes(x=mu, y=var, alpha=density_entropy), fill="goldenrod1") + + new_scale("alpha") + geom_tile(data=aggregated, mapping=aes(x=mu, y=var, alpha=density_point), fill="dodgerblue3") + + new_scale("alpha") + geom_tile(data=aggregated_squared_lambda_diff, mapping=aes(x=mu, y=var, alpha=distance), fill = "brown1") + + xlab("mu") + ylab("sigma") + scale_x_discrete(labels=scaleFUN) + scale_y_discrete(labels=scaleFUN) + + +ggplot(mapping=aes(x, y)) + geom_tile(data=aggregated, mapping=aes(x=mu, y=var, alpha=density_point), fill="dodgerblue3") + + new_scale("alpha") + geom_tile(data=aggregated_squared_lambda_diff, mapping=aes(x=mu, y=var, alpha=distance), fill = "brown1") + + xlab("mu") + ylab("sigma") + scale_x_discrete(labels=scaleFUN) + scale_y_discrete(labels=scaleFUN) + +ggplot(mapping=aes(x, y)) + geom_tile(data=aggregated, mapping=aes(x=mu, y=var, alpha=density_entropy), fill="goldenrod1") + + new_scale("alpha") + geom_tile(data=aggregated_squared_lambda_diff, mapping=aes(x=mu, y=var, alpha=distance), fill = "brown1") + + xlab("mu") + ylab("sigma") + scale_x_discrete(labels=scaleFUN) + scale_y_discrete(labels=scaleFUN) +``` diff --git a/vignettes/DAP_self_calibration.Rmd b/vignettes/DAP_self_calibration.Rmd new file mode 100644 index 0000000..b339dd2 --- /dev/null +++ b/vignettes/DAP_self_calibration.Rmd @@ -0,0 +1,619 @@ +--- +title: "self calibration analysis and optimization" +author: "Hyunji Moon, Shinyoung Kim" +output: html_document +--- + +## Preface and Contents + +In this vignette, we explain the concept of self-consistency and its potential application for identifying miscalibrations in inference algorithms. We also introduce an iterative process for a finding a prior region with a minimal amount of computational pathologies under a given inference algorithm. + +We will give a basic explanation of self-consistency and demonstrate its usage scenario with some examples. + +## Introduction and Explanation + +[Simulation Based Calibration](https://arxiv.org/pdf/1804.06788.pdf)(SBC) is often used for detecting serious computational issues. The failure of a SBC test is regarded as a result of a computationally unfaithful inference algorithm. Typically the user determines whether it's a failure by analyzing a set of graphical plots, instead of a given criterion. +```{r, echo=FALSE, fig.cap="", out.width = '70%', fig.align='center', fig.cap = "Some analysis results of SBC, courtesy of M. Modrak"} +knitr::include_graphics(path.expand("~/git_repos/hyunjimoon/SBC/vignettes/rmarkdown_images/modrak_sbc_results.png")) +``` + +Like the image above, we can use graphical plots to get a rough idea of the pathological details; under vs over-dispersion, under vs overestimation, and so on. +In general, we inspect the rank plots for uniformity. This is normally done by viewing the plots holistically for obvious deviations, or through quantitative methods like the chi-square test or empirical coverage metrics. +--- + +We propose a new test metric, based on two concepts, the Data Averaged Posterior and the self-consistency metric. Our motivations is that by taking advantage of the multiple "generate and fit" iterations of SBC, it will give us additional information to construct a more interpretable metric of miscalibration. + +We first draw a sample $\theta$ from the prior. Then we draw some data given the drawn prior sample($\pi(y \ | \ \theta)$) with respect to the model's data generating process. And finally, through running inference with the simulated data we obtain some posterior distribution($\pi(\tilde{\theta} \ | \ y)$): + +$$ +\pi(\tilde{\theta}) = \int \pi(\tilde{\theta} \ |\ y) \pi(y \ | \ \theta) \pi(\theta) \mathrm{d}y \ \mathrm{d}\theta +$$ + +If we were to take the average of the expectation of the computed posterior, given a faithful inference algorithm, it should equal the expectation of the prior distribution. This is the concept of the Data Averaged Posterior(DAP), $\pi(\tilde{\theta})$. + +Since we realistically can't compute the exact integral, we normally repeat the "sample-twice-and-fit" process $M$ times. Let $Y \sim f(\theta)$ denote the data generating process, and $\tilde{\theta} \sim \hat{g}(Y, P_\lambda)$ denote the inference algorithm, given some parameterized prior $P_\lambda$. $P_\lambda$ can be parameterized in whichever way, but for simplicity we'll use $P_\lambda = N(\lambda_\mu, \lambda_{var})$. Let $L$ denote the number of posterior samples drawn per iteration. We can then rewrite the DAP computation as the following: + +$$ +\theta_m \sim P_\lambda, \ Y_m \sim f(\theta_m), \ \tilde{\theta}_m^l \sim \hat{g}(Y_m, P_\lambda) \\ +l = 1, 2, \ ... \ , L \\ +m = 1, 2, \ ... \ , M +$$ + +In other words, for each iteration $m$ we draw a prior sample, then simulate some data through the data generating process $f$, and finally generate $l$ number of posterior samples with the inference algorithm $\hat{g}$. + +And with the $l$ posterior samples, we're free to compute whatever summary statistic which matches that of the parameterized prior $P_\lambda$. Since we've used the normal prior parameterization, let's go ahead and calculate the posterior mean for each iteration: + +$$ +\hat{\theta}_m = \frac{1}{L} \sum^L_{l=1} \tilde{\theta}_m^l +$$ +Define the computational bias, $B$, given some inference algorithm $\hat{g}$ and a parameterized prior $\lambda$ as the following: + +$$ +B(\hat{g}, \lambda) := E[\hat{\theta} \ | \ \theta_{1,...,M}] - \bar{\theta} = \frac{1}{M} \sum_M (\hat{\theta}_m - \theta_m) +$$ +where $\bar{\theta}$ denotes the expectation of the draws from the prior, $P_{\lambda}$. + +Also define the computational variance $V(\hat{g}, \lambda)$ in the same fashion: + +$$ +V(\hat{g}, \lambda) := \frac{1}{M} \sum_M (\hat{\theta}_m - \bar{\theta})^2 +$$ +This is probably familiar - it's straight from the bias-variance decomposition of the mean squared error. + +So to sum it up, we're still doing the repeated "simulate and fit" thing from SBC. But instead of calculating ranks, we calculate a summary statistic which can be directly compared against the prior. And then calculated the bias and variance which can give us some information on how the inference algorithm is screwing up with this particular prior. When combined with the existing SBC tools and plots, such as the chi-square test for rank uniformity, this gives us a powerful package to identify and diagnose various miscalibrations. + +## Demonstration + +Okay, so we explained self-consistency and how the bias-variance decomposition can be potentially used for better identifying miscalibration. In order to demonstrate, we're going to mess with a couple of models and find prior regions where the inference algorithm behaves well and poorly. We'll then investigate how these behaviors are represented by the bias and variance metrics. And finally, we'll demonstrate an iterative update scheme whose final aim is to transport initial prior to a more satisfactory region. + +--- + +Doing the typical library imports here. We'll be using an awesome package that helps us easily perform SBC. Check it out [here](https://github.com/hyunjimoon/SBC) :D +```{r setup, results = 'hide', warning=FALSE, message=FALSE} +library(SBC) +library(cmdstanr) +library(parallel) +library(bayesplot) +library(posterior) +library(dplyr) +library(rstan) +library(future) +library(ggpubr) +library(rstanarm) +library(ggplot2) +options(mc.cores = parallel::detectCores()) +plan(multisession) +options(SBC.min_chunk_size = 5) +``` + +We will be using the eightschools model for demonstration: + +$$ +Y_i \sim \mathrm{normal}(\theta_i, \sigma_i) \\ +\theta'_i \sim \mathrm{normal}(0, 1) \\ +\theta_i = \theta'_i \times \tau + \mu \\ +\mu \sim \mathrm{normal}(0, 5) \\ +\tau \sim \mathrm{normal}(0, 5) +$$ +The parameter of interest here will be $\tau$. In the original model, a prior distribution of $\mathrm{normal}(0, 5)$ has been set, but we'd like to investigate how other prior regions fare in terms of SBC. So we'll try a grid search on the prior hyperparameters with both HMC and optimization and see how the results differ. + +$$ +Y_i \sim \mathrm{normal}(\theta_i, \sigma_i) \\ +\theta'_i \sim \mathrm{normal}(0, 1) \\ +\theta_i = \theta'_i \times \tau + \mu \\ +\mu \sim \mathrm{normal}(0, 5) \\ +\tau \sim \mathrm{normal}(\lambda_{\mu}, \lambda_{\sigma^2}) +$$ +The hyperparameters for the distribution of $\mathrm{log}(\tau)$, $\lambda_{\mu}, \lambda_{\sigma^2}$, will be the subject of the grid search. + +--- +### Setup +We'll set the basic variables here. The number of SBC iterations($M$ in the previous notation) will be set to 100 with 1000 posterior samples($L$). We'll also be rolling with the standard 4 chain setup for HMC: + +```{r, cache = TRUE} +## Generator settings +# number of SBC simulations per iteration (generator) +nsims <- 100 + +# number of observations +nobs <- 10 + +## Backend settings +# number of draws per posterior approximation +ndraws <- 1000 + +# number of chains for hmc posterior approximation +nchains <- 4 + +fixed_args_eightschools <- list(J = 8, nsims = nsims, sigma = c(15, 10, 16, 11, 9, 11, 10, 18), nsims=nsims, dist_types=list(tau="normal")) +``` + +The `SBC` package allows us to define the prior and data generating process, $P_\lambda,f$, purely in R. This serves two purposes. First, we can verify that the model we have specified is consistent with our intentions i.e. contains no bugs. And we can freely simulate parameters and data without having to call Stan. + +The `generator` function is the prior and data generating process: + +```{r, cache = TRUE} +generator_eightschools_ncp <- function(lambdas, fixed_args){ + # fixed value across simulated datasets + nsims <- fixed_args$nsims + J <- fixed_args$J + sigma <- fixed_args$sigma + + lambda_mu <- lambdas$tau$mu + lambda_sigma <- sqrt(lambdas$tau$var) + + # Draw tau from the designated normal distribution + # lambda_sigma is positively bounded + tau <- abs(rnorm(1, lambda_mu, lambda_sigma)) + + # other parameters are drawn from the default prior + mu = rnorm(1, 0, 5) + + theta_trans <- rnorm(J, 0, 1) + + theta <- theta_trans * tau + mu + # draw y from simulated parameters + y <- rnorm(J, theta, sigma) + + list( + variables = list( + tau = tau + ), + generated = list( + J = J, + y = y, + sigma = sigma, + nsims = nsims, + lambda_mu = lambda_mu, + lambda_var = lambdas$tau$var + ) + ) +} +``` + +Note the code line ` tau <- rnorm(1, lambda_mu, lambda_sigma)`. Like exactly in the model specification, we are drawing $\mathrm{log}(\tau)$ given hyperparameters $\lambda_{\mu}, \lambda_{\sigma^2}$. + +Now we define the HMC and optimizing `SBC` backends, which represent $\hat{g}$. + +```{R, warning=FALSE, message=FALSE, error=FALSE} +cmdstan_mod_eightschools <- cmdstanr::cmdstan_model("models/eightschools_ncp_posteriordb.stan") +backend_eightschools_hmc <- SBC_backend_cmdstan_sample(cmdstan_mod_eightschools, chains = 4, iter_sampling = ndraws / 4) + +rstan_mod_eightschools <- rstan::stan_model("models/eightschools_ncp_posteriordb.stan") +backend_eightschools_opt <- SBC_backend_rstan_optimizing(rstan_mod_eightschools, draws=ndraws) + +calib_generator <- function(lambdas, fixed_args){ + generate_datasets(SBC_generator_function(generator_eightschools_ncp, lambdas, fixed_args), n_datasets = fixed_args_eightschools$nsims) +} +``` + +Finally we define a function that computes the DAP and then calculate bias, variance metrics: + +```{r, cache = TRUE} +calculate_dap <- function(lambda_mu, lambda_var, generator, datasets=NULL, backened, fixed_args){ + if(is.null(datasets)){ + lambda_init_eightschools <- list( + tau = list(mu=lambda_mu, var=lambda_var) + ) + datasets <- do.call(generator, list(lambda_init_eightschools, fixed_args = fixed_args)) + } + sbc_result <- compute_results(datasets, backened, thin_ranks = 1) + draws_params <- c() + draws_Y <- c() + prior_thetas <- posterior::extract_variable(datasets$variables, "tau") + theta_bar <- mean(prior_thetas) + + B <- 0 + V <- 0 + for(i in 1:nsims){ + draws_Y <- c(draws_Y, datasets$generated[[i]]$Y) + samples <- SBC_fit_to_draws_matrix(sbc_result$fits[[i]]) + params <- posterior::extract_variable(samples, "tau") + draws_params <- c(draws_params, params) + + B <- B + mean(params) + V <- V + sum((params - mean(params))^2) + } + + B <- B / fixed_args$nsims - theta_bar + V <- V / (fixed_args$nsims * ndraws) + + mu <- mean(draws_params) + var <- sd(draws_params)^2 + + return(list(mu=mu, var=var, draws_params=draws_params, draws_Y=draws_Y, B=B, V=V, datasets=datasets)) +} +``` + +Once we have the model and DAP generation we can start the grid search. We create a grid of mean-variance values and run inference with HMC and optimization for each hyperparameter combination. Note that the same prior-data samples are being used for optimization and HMC: + +```{r include=FALSE, eval=TRUE} +mu_seq <- readRDS(file="DAP_self_calibration_mu_seq.rds") +var_seq <- readRDS(file="DAP_self_calibration_var_seq.rds") +grid_size <- length(mu_seq) * length(var_seq) + +dap_lambda_mu <- readRDS(file="DAP_self_calibration_dap_lambda_mu.rds") +dap_lambda_var <- readRDS(file="DAP_self_calibration_dap_lambda_var.rds") +lambda_mu <- readRDS(file="DAP_self_calibration_lambda_mu.rds") +lambda_var <- readRDS(file="DAP_self_calibration_lambda_var.rds") +B_optim <- readRDS(file="DAP_self_calibration_B_optim.rds") +V_optim <- readRDS(file="DAP_self_calibration_V_optim.rds") +B_hmc <- readRDS(file="DAP_self_calibration_B_hmc.rds") +V_hmc <- readRDS(file="DAP_self_calibration_V_hmc.rds") +dap_lambda_mu_hmc <- readRDS(file="DAP_self_calibration_dap_lambda_mu_hmc.rds") +dap_lambda_var_hmc <- readRDS(file="DAP_self_calibration_dap_lambda_var_hmc.rds") +``` + +```{r, cache=TRUE, warning=FALSE, message=FALSE, eval=FALSE} +gridsize_mu <- 5 +gridsize_var <- 5 +mu_seq <- c(0, 1, 2, 3, 4) +var_seq <- c(1, 2, 5, 7, 10) +grid_size <- length(mu_seq) * length(var_seq) + +dap_lambda_mu <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +dap_lambda_var <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +rownames(dap_lambda_mu) <- mu_seq +rownames(dap_lambda_var) <- mu_seq +colnames(dap_lambda_mu) <- var_seq +colnames(dap_lambda_var) <- var_seq + +lambda_mu <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +lambda_var <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +rownames(lambda_mu) <- mu_seq +rownames(lambda_var) <- mu_seq +colnames(lambda_mu) <- var_seq +colnames(lambda_var) <- var_seq + +B_optim <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +V_optim <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +rownames(B_optim) <- mu_seq +rownames(V_optim) <- mu_seq +colnames(B_optim) <- var_seq +colnames(V_optim) <- var_seq + +B_hmc <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +V_hmc <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +rownames(B_hmc) <- mu_seq +rownames(V_hmc) <- mu_seq +colnames(B_hmc) <- var_seq +colnames(V_hmc) <- var_seq + +dap_lambda_mu_hmc <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +dap_lambda_var_hmc <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +rownames(dap_lambda_mu_hmc) <- mu_seq +rownames(dap_lambda_var_hmc) <- mu_seq +colnames(dap_lambda_mu_hmc) <- var_seq +colnames(dap_lambda_var_hmc) <- var_seq + +for(j in 1:length(var_seq)){ + for(i in 1:length(mu_seq)){ + dap <- calculate_dap(mu_seq[[i]],var_seq[[j]] , calib_generator, NULL, backend_eightschools_opt, fixed_args_eightschools) + dap_lambda_mu[i, j] <- dap$mu + dap_lambda_var[i, j] <- dap$var + lambda_mu[i, j] <- mu_seq[[i]] + lambda_var[i, j] <- var_seq[[j]] + B_optim[i, j] <- dap$B + V_optim[i, j] <- dap$V + + dap_hmc <- calculate_dap(mu_seq[[i]],var_seq[[j]], calib_generator, dap$datasets, backend_eightschools_hmc, fixed_args_eightschools) + B_hmc[i, j] <- dap_hmc$B + V_hmc[i, j] <- dap_hmc$V + dap_lambda_mu_hmc[i, j] <- dap_hmc$mu + dap_lambda_var_hmc[i, j] <- dap_hmc$var + } +} +``` + +```{r, include=FALSE, eval=FALSE} +saveRDS(mu_seq, file="DAP_self_calibration_mu_seq.rds") +saveRDS(var_seq, file="DAP_self_calibration_var_seq.rds") + +saveRDS(dap_lambda_mu, file="DAP_self_calibration_dap_lambda_mu.rds") +saveRDS(dap_lambda_var, file="DAP_self_calibration_dap_lambda_var.rds") +saveRDS(lambda_mu, file="DAP_self_calibration_lambda_mu.rds") +saveRDS(lambda_var, file="DAP_self_calibration_lambda_var.rds") +saveRDS(B_optim, file="DAP_self_calibration_B_optim.rds") +saveRDS(V_optim, file="DAP_self_calibration_V_optim.rds") +saveRDS(B_hmc, file="DAP_self_calibration_B_hmc.rds") +saveRDS(V_hmc, file="DAP_self_calibration_V_hmc.rds") +saveRDS(dap_lambda_mu_hmc, file="DAP_self_calibration_dap_lambda_mu_hmc.rds") +saveRDS(dap_lambda_var_hmc, file="DAP_self_calibration_dap_lambda_var_hmc.rds") + +``` + +After waiting quite a bit for the computation to finish, we can compare the results between HMC and optimization. Let's first look at optimization's prior plot: + +```{r, cache=TRUE, fig.align='center', fig.cap="prior-posterior grid plot of optimization"} + +scaleFUN <- function(x) sprintf("%.2f", x) +dap_df <- as.data.frame.table(dap_lambda_mu) +colnames(dap_df)[1] <- "mu" +colnames(dap_df)[2] <- "var" +dap_df[, "mu"] <- as.numeric(as.vector(dap_df[, "mu"])) # not using as.vector converts the factor indices +dap_df[, "var"] <- as.numeric(as.vector(dap_df[, "var"])) +colnames(dap_df)[3] <- "dap_mu" +dap_lambda_df <- as.data.frame.table(dap_lambda_var) +dap_df[, "dap_var"] <- as.numeric(dap_lambda_df$Freq) +ggplot(dap_df, mapping=aes(x="mu", y="var")) + geom_point(aes(x=mu, y=var), color = "red") + geom_point(aes(x=dap_mu, y = dap_var), color = "blue") + scale_y_continuous(labels=scaleFUN) + geom_segment(aes(x=mu, y=var, xend=dap_mu, yend=dap_var), size=0.2, arrow = arrow(length = unit(0.3, "cm"))) + xlab("mean") + ylab("variance") + ggtitle("optimization") +``` + +The red points on the bottom right denote the initial hyperparameter values; variance plotted against mean. The blue points are the same statistics calculated against samples from the DAP. If a given prior was to be self-consistent, we should expect little discrepancy between the initial prior values and the DAP. We can see that some priors show very large variance values recovered from the DAP, which is assumed to be caused from the combination of an extreme prior and optimization not finding the optima. + +Let's zoom in to identify which prior combinations are being problematic: + +```{R, fig.align='center', fig.cap="zoomed prior-posterior grid plot of HMC"} +ggplot(dap_df, mapping=aes(x="mu", y="var")) + geom_point(aes(x=mu, y=var), color = "red") + geom_point(aes(x=dap_mu, y = dap_var), color = "blue") + scale_y_continuous(labels=scaleFUN) + geom_segment(aes(x=mu, y=var, xend=dap_mu, yend=dap_var), size=0.2, arrow = arrow(length = unit(0.3, "cm"))) + xlab("mean") + ylab("variance") + ggtitle("optimization") + coord_cartesian(ylim=c(0, 20), xlim = c(0, 5)) + scale_y_continuous(breaks=c(1:20)) +``` +For priors with mean value zero, DAP values couldn't be calculated, resulting in `Inf`. Other priors show the recovered DAP variance values to be spurious and extremely large. + + +Let's see if HMC fares better: + +```{r, cache=TRUE, fig.align='center', fig.cap="prior-posterior grid plot of HMC"} + +scaleFUN <- function(x) sprintf("%.2f", x) +dap_df <- as.data.frame.table(dap_lambda_mu_hmc) +colnames(dap_df)[1] <- "mu" +colnames(dap_df)[2] <- "var" +dap_df[, "mu"] <- as.numeric(as.vector(dap_df[, "mu"])) # not using as.vector converts the factor indices +dap_df[, "var"] <- as.numeric(as.vector(dap_df[, "var"])) +colnames(dap_df)[3] <- "dap_mu" +dap_lambda_df <- as.data.frame.table(dap_lambda_var_hmc) +dap_df[, "dap_var"] <- as.numeric(dap_lambda_df$Freq) +ggplot(dap_df, mapping=aes(x="mu", y="var")) + geom_point(aes(x=mu, y=var), color = "red") + geom_point(aes(x=dap_mu, y = dap_var), color = "blue") + scale_y_continuous(labels=scaleFUN, breaks=c(1:10)) + geom_segment(aes(x=mu, y=var, xend=dap_mu, yend=dap_var), size=0.2, arrow = arrow(length = unit(0.3, "cm"))) + xlab("mean") + ylab("variance") + ggtitle("HMC") +``` + +Although to a lesser degree, we can still observe some deviations from the prior. But overall, HMC performs better inference preserving self-calibration compared to optimization. The deviations aren't due to sampling errors but instead the posterior moving toward regions favoring the likelihood, since we're using `sigma` values from the original model's data, instead of simulated values. + +We'll now plot the bias and variance metrics of optimization and HMC together. Since HMC was relatively well calibrated as to optimization, we should observe its metrics being lower than that of optimization: + +```{r, cache=TRUE, fig.width = 14, fig.align='center'} + +B_optim_df <- as.data.frame.table(B_optim) +colnames(B_optim_df)[colnames(B_optim_df) == "Var1"] <- "Mu" +colnames(B_optim_df)[colnames(B_optim_df) == "Var2"] <- "Var" +colnames(B_optim_df)[colnames(B_optim_df) == "Freq"] <- "B" +B_optim_df[, "type"] = "optimization" + +B_hmc_df <- as.data.frame.table(B_hmc) +colnames(B_hmc_df)[colnames(B_hmc_df) == "Var1"] <- "Mu" +colnames(B_hmc_df)[colnames(B_hmc_df) == "Var2"] <- "Var" +colnames(B_hmc_df)[colnames(B_hmc_df) == "Freq"] <- "B" +B_hmc_df[, "type"] = "hmc" + +B_df = rbind(B_optim_df, B_hmc_df) +B_df[, "B"] <- abs(B_df[, "B"]) + +breaks <- c(-0.1, 0.01, 0.1, 1, 2, 4, 16) + +ggplot(na.omit(B_df)) + geom_tile(aes(Mu, Var, fill=B)) + coord_fixed() + facet_grid(cols=vars(type)) + scale_fill_gradient2(low = "blue", mid="white", high = "red", trans="log", breaks=breaks) + ggtitle("abs(B) plot for HMC and optimization (lower is better)") +``` +Note that bias values for optimization at mean = 0 was unable to be calculated, since optimization returned extreme results. + +We can observe that HMC possesses far smaller bias metric values, implying that HMC contains less bias within its inference results as to optimization. In addition, some prior regions, such as for cases where `Var <= 2` and `Mu >= 2` yields the lowest bias values, suggesting that these regions are preferable as to other regions for minimal bias. + +```{r, cache=TRUE, fig.width = 14, fig.align='center'} + +V_optim_df <- as.data.frame.table(V_optim) +colnames(V_optim_df)[colnames(V_optim_df) == "Var1"] <- "Mu" +colnames(V_optim_df)[colnames(V_optim_df) == "Var2"] <- "Var" +colnames(V_optim_df)[colnames(V_optim_df) == "Freq"] <- "V" +V_optim_df[, "type"] = "optimization" + +V_hmc_df <- as.data.frame.table(V_hmc) +colnames(V_hmc_df)[colnames(V_hmc_df) == "Var1"] <- "Mu" +colnames(V_hmc_df)[colnames(V_hmc_df) == "Var2"] <- "Var" +colnames(V_hmc_df)[colnames(V_hmc_df) == "Freq"] <- "V" +V_hmc_df[, "type"] = "hmc" + +V_df = rbind(V_optim_df, V_hmc_df) +V_df[, "V"] <- abs(V_df[, "V"]) + +ggplot(na.omit(V_df)) + geom_tile(aes(Mu, Var, fill=V)) + coord_fixed() + facet_grid(cols=vars(type)) + scale_fill_gradient2(low = "blue", mid="white", high = "red", trans="log") + ggtitle("abs(V) plot for HMC and optimization (lower is better)") +``` + +For the variance metric $V$, we can again see that HMC outperforms optimization, showing that the dispersion of recovered posterior values are far lower, suggesting a much lower level of error as to optimization. + +### Iteratively updating prior for well-calibration + +Up to now, we've worked with metrics and identified various regions where miscalibration occured, or on the other side, well-calibrated. Can we try and go the other direction? How can we use the prior-posterior discrepancy information to refine our prior? In this section, we explore a prior update algorithm which attempts to move from a badly calibrated region to a more well-calibrated area. + +The iterative update algorithm uses the metric attained from a calculated DAP to update the prior's hyperparameters. Recall the DAP equation: + +$$ +\pi(\tilde{\theta}) = \int \pi(\tilde{\theta} \ |\ y) \pi(y \ | \ \theta) \pi(\theta) \mathrm{d}y \ \mathrm{d}\theta +$$ + +We generate metric from the DAP $\pi(\tilde{\theta})$, which allows us to directly measure the discrepancy between it and the prior distribution's hyperparameters, $\pi(\theta) = P_\lambda$. The discrepency can be used to guide a local search along the prior space. The increment of the descent step can be varied, but we provide a few options: + +1. "Markov chain"-like: This method reuses the calculated summary statistics of the DAP as the prior hyperparameter values of the next iterations. It's the simplest option, but can be effective for simpler models. +2. Cubic weights: We increment/decrement the hyperparameters in cubic order so that as the discrepancy gets smaller, the stepsize also gets smaller. On the otherhand, large discrepency can result in exponential stepsizes which causes the hyperparameters to rapidly shrink towards the DAP summary statistic. It was found that this method was effective in speeding up calibration for priors with a relatively scale-invariant region, since the cubic nature may only allow subtle movements near the ambient region of the DAP statistic. + +We'll try to start from a "bad", miscalibrated prior region and try to iteratively refine the prior into a more favorable region. We'll be starting from $N(2, 10)$ with optimization. Let's generate a rank plot as a quick check to see how bad the region is: +```{R, warning=FALSE, error=FALSE} +lambda_init_eightschools <- list( + tau = list(mu=5, var=10) +) + +datasets_eightschools_new <- generate_datasets(SBC_generator_function(generator_eightschools_ncp, lambda_init_eightschools, fixed_args_eightschools), n_sims = fixed_args_eightschools$nsims) + +result_eightschools_opt_new <- compute_results(datasets_eightschools_new, backend_eightschools_opt, thin_ranks = 1) + +plot_rank_hist(result_eightschools_opt_new, variables=list("tau")) +``` + +We can see that the ranks are not uniform, and thus the starting point is not well calibrated. Let's try the iterative update: + +```{R, warning=FALSE, message=FALSE} +updator = "mc_update" + + +# tolerance +tol <- 0.01 + +# learning rate +gamma <- 1.5 # 0.5 for gradient update, 10 for normal_str_update + +sc_eightschools_opt <- self_calib_adaptive(calib_generator, backend_eightschools_opt, updator, c("tau"), lambda_init_eightschools, nsims, 15, gamma, tol, fixed_args = fixed_args_eightschools) +``` + +This algorithm continuously updates the prior with the previous iteration's posterior, which drives the posterior into a well-calibrated region. + +```{R, warning=FALSE, error=FALSE} +sc_eightschools_opt$t_df +``` + +The iterative update algorithm determined the following $\mu, \sigma^2$ prior to be better calibrated: + +```{r} +sc_eightschools_opt$lambda$var = sc_eightschools_opt$lambda$var ^ 2 +sc_eightschools_opt$lambda +``` + +Let's check the rank plots at the region: + + +```{R, warning=FALSE, error=FALSE} +datasets_eightschools_new <- generate_datasets(SBC_generator_function(generator_eightschools_ncp, sc_eightschools_opt$lambda, fixed_args_eightschools), n_sims = fixed_args_eightschools$nsims) + +result_eightschools_opt_new <- compute_results(datasets_eightschools_new, backend_eightschools_opt, thin_ranks = 1) + +plot_rank_hist(result_eightschools_opt_new, variables=list("tau")) +``` + +We can identify that the rank plot has been significantly improved, which suggests the refined prior may be considered for usage if self-calibration is desired. + +We'll try again with HMC this time: + + +```{R, warning=FALSE, message=FALSE} +updator = "mc_update" + + +# tolerance +tol <- 0.01 + +# learning rate +gamma <- 1.5 # 0.5 for gradient update, 10 for normal_str_update + +sc_eightschools_opt <- self_calib_adaptive(calib_generator, backend_eightschools_hmc, updator, c("tau"), lambda_init_eightschools, nsims, 10, gamma, tol, fixed_args = fixed_args_eightschools) + +datasets_eightschools_new <- generate_datasets(SBC_generator_function(generator_eightschools_ncp, sc_eightschools_opt$lambda, fixed_args_eightschools), n_sims = fixed_args_eightschools$nsims) + +result_eightschools_opt_new <- compute_results(datasets_eightschools_new, backend_eightschools_opt, thin_ranks = 1) + +plot_rank_hist(result_eightschools_opt_new, variables=list("tau")) +``` +## SBC on Binom + +```{R, warning=FALSE, message=FALSE} +nsims <- 200 + +# number of observations +nobs <- 10#2 + +# link function (1 = logit, 2 = probit, 3 = cloglog) +link <- 1 + +# number of binomial trials per observation +nsize <- 10 + +## Backend settings +# number of draws per posterior approximation +ndraws <- 100 + +# number of chains for hmc posterior approximation +nchains <- 2 + +generator_binom <- function(lambdas, fixed_args){ + # fixed value across simulated datasets + # experiment settings + nobs <- fixed_args$nobs + nsize <- fixed_args$nsize + dist_types <- fixed_args$dist_types + # modular settings + link_type <- fixed_args$link_type + + # generate + lambda_arg1 <- c() + lambda_arg2 <- c() + if(dist_types$eta == "normal"){ + eta <- rnorm(1, mean = lambdas$eta$mu, sd=lambdas$eta$sigma) + lambda_arg1 <- c(lambda_arg1, lambdas$eta$mu) + lambda_arg2 <- c(lambda_arg2, lambdas$eta$sigma) + } + else if(dist_types$eta == "gamma"){ + eta <- rgamma(1, shape = lambdas$eta$alpha, rate = lambdas$eta$beta) + lambda_arg1 <- c(lambda_arg1, lambdas$eta$alpha) + lambda_arg2 <- c(lambda_arg2, lambdas$eta$beta) + } + + + + mu <- invtf_param_vec(eta, link_type = link_type) + Y <- rbinom(nobs, size = nsize, prob = mu) + list( + parameters = list(eta = eta), + generated = list(nobs= nobs, nsize = nsize, link = link_type, + dist_types = match(unlist(dist_types), c("normal", "gamma")), lambda_arg1 = lambda_arg1, lambda_arg2 = lambda_arg2, + Y = Y) + ) +} + +fixed_args_binom <- list(nobs = nobs, nsize = nsize, link_type = 1, nsims = nsims, ndraws = ndraws, dist_types=list(eta="normal")) +``` + +```{R, warning=FALSE, error=FALSE} +## Generator settings +# number of SBC simulations per iteration (generator) + + +lambda_init_binom <- list( + eta = list(mu=100, sigma=100) +) +datasets_binom <- SBC::generate_datasets(SBC_generator_function(generator_binom, lambda_init_binom, fixed_args_binom), n_datasets = fixed_args_binom$nsims) + +# hyperparameter update algorithm +updator = "mc_update" + +# maximal number of SBC iterations +niter <- 100 + +# tolerance +tol <- 0.1 + +# learning rate +gamma <- 1.5 # 0.5 for gradient update, 10 for normal_str_update + +# step2: inferring posterior +rstan_binom_mod <- stan_model("models/binom-laplace.stan") +cmdstan_binom_mod <- cmdstanr::cmdstan_model("models/binom-laplace.stan") + +backend_binom_opt <- SBC_backend_rstan_optimizing(rstan_binom_mod, draws = ndraws) +#backend_binom_hmc <- SBC_backend_cmdstan_sample(cmdstan_binom_mod, chains = 4, iter_sampling = ndraws / 4) # thin = 10 + +# initial badly calibrated +#result_binom_opt <- compute_results(datasets_binom, backend_binom_opt, thin_ranks = 1) + +``` + +HMC on the otherhand couldn't converge to a well-calibrated region. + +## The role of the calibration algorithm and the interpretation of calibrated $\lambda$ + +The calibration algorithm iteratively updates the prior, $\lambda$, in an attempt to move to a calibrated region. This means That its hyperparameter values, mean and variance in the case of the Normal distribution, are the target of the update. We would like to address the statement of whether basic priors are sufficient in representing computationally calibrated priors. + +In the case of the sequential update algorithm, the goal is to allocate high probability mass on desirable regions, and vice versa. The resulting calibrated only conveys the following information: It was found that the region centered on the resulting distribution was (relatively) well calibrated. + +We would believe that in most cases the calibrated prior will not be used directly; instead it will be used as a reference for determining the bounds of the calibrated region. Given this information, domain knowledge will be elicited into an actual prior distribution, while hopefully abiding the calibrated region information. + +In conclusion, finding calibrated regions does not involve domain knowledge elicitation and in most cases the simple probability densities may be sufficient just for identifying calibrated regions of the prior. The user then may use this range of values as a reference when defining priors in an attempt for a computationally calibrated prior. + +In addition, we have demonstrated that it struggles in highly sensitive and difficult prior regions; this method may not be preferable for models in which prior influence trumps that of the likelihood. + +## Wrap-up + +We've explored the concept of self-consistency and with prior-posterior grid plots identified prior regions which are deemed to be miscalibrated. Furthermore, we've used two metric, the bias and variance calibration metrics to identify in detail how different prior regions behave with respect to an inference algorithm. + +Finally, we showed by iteratively updating the prior we were able to go from a poorly calibrated region to a well-calibrated region. The hyperparameters of the prior were sequentially updated until relative calibration was reached, giving the user some reference point of a calibrated region to aid in prior construction. diff --git a/vignettes/DAP_self_calibration_eightschools.Rmd b/vignettes/DAP_self_calibration_eightschools.Rmd new file mode 100644 index 0000000..b339dd2 --- /dev/null +++ b/vignettes/DAP_self_calibration_eightschools.Rmd @@ -0,0 +1,619 @@ +--- +title: "self calibration analysis and optimization" +author: "Hyunji Moon, Shinyoung Kim" +output: html_document +--- + +## Preface and Contents + +In this vignette, we explain the concept of self-consistency and its potential application for identifying miscalibrations in inference algorithms. We also introduce an iterative process for a finding a prior region with a minimal amount of computational pathologies under a given inference algorithm. + +We will give a basic explanation of self-consistency and demonstrate its usage scenario with some examples. + +## Introduction and Explanation + +[Simulation Based Calibration](https://arxiv.org/pdf/1804.06788.pdf)(SBC) is often used for detecting serious computational issues. The failure of a SBC test is regarded as a result of a computationally unfaithful inference algorithm. Typically the user determines whether it's a failure by analyzing a set of graphical plots, instead of a given criterion. +```{r, echo=FALSE, fig.cap="", out.width = '70%', fig.align='center', fig.cap = "Some analysis results of SBC, courtesy of M. Modrak"} +knitr::include_graphics(path.expand("~/git_repos/hyunjimoon/SBC/vignettes/rmarkdown_images/modrak_sbc_results.png")) +``` + +Like the image above, we can use graphical plots to get a rough idea of the pathological details; under vs over-dispersion, under vs overestimation, and so on. +In general, we inspect the rank plots for uniformity. This is normally done by viewing the plots holistically for obvious deviations, or through quantitative methods like the chi-square test or empirical coverage metrics. +--- + +We propose a new test metric, based on two concepts, the Data Averaged Posterior and the self-consistency metric. Our motivations is that by taking advantage of the multiple "generate and fit" iterations of SBC, it will give us additional information to construct a more interpretable metric of miscalibration. + +We first draw a sample $\theta$ from the prior. Then we draw some data given the drawn prior sample($\pi(y \ | \ \theta)$) with respect to the model's data generating process. And finally, through running inference with the simulated data we obtain some posterior distribution($\pi(\tilde{\theta} \ | \ y)$): + +$$ +\pi(\tilde{\theta}) = \int \pi(\tilde{\theta} \ |\ y) \pi(y \ | \ \theta) \pi(\theta) \mathrm{d}y \ \mathrm{d}\theta +$$ + +If we were to take the average of the expectation of the computed posterior, given a faithful inference algorithm, it should equal the expectation of the prior distribution. This is the concept of the Data Averaged Posterior(DAP), $\pi(\tilde{\theta})$. + +Since we realistically can't compute the exact integral, we normally repeat the "sample-twice-and-fit" process $M$ times. Let $Y \sim f(\theta)$ denote the data generating process, and $\tilde{\theta} \sim \hat{g}(Y, P_\lambda)$ denote the inference algorithm, given some parameterized prior $P_\lambda$. $P_\lambda$ can be parameterized in whichever way, but for simplicity we'll use $P_\lambda = N(\lambda_\mu, \lambda_{var})$. Let $L$ denote the number of posterior samples drawn per iteration. We can then rewrite the DAP computation as the following: + +$$ +\theta_m \sim P_\lambda, \ Y_m \sim f(\theta_m), \ \tilde{\theta}_m^l \sim \hat{g}(Y_m, P_\lambda) \\ +l = 1, 2, \ ... \ , L \\ +m = 1, 2, \ ... \ , M +$$ + +In other words, for each iteration $m$ we draw a prior sample, then simulate some data through the data generating process $f$, and finally generate $l$ number of posterior samples with the inference algorithm $\hat{g}$. + +And with the $l$ posterior samples, we're free to compute whatever summary statistic which matches that of the parameterized prior $P_\lambda$. Since we've used the normal prior parameterization, let's go ahead and calculate the posterior mean for each iteration: + +$$ +\hat{\theta}_m = \frac{1}{L} \sum^L_{l=1} \tilde{\theta}_m^l +$$ +Define the computational bias, $B$, given some inference algorithm $\hat{g}$ and a parameterized prior $\lambda$ as the following: + +$$ +B(\hat{g}, \lambda) := E[\hat{\theta} \ | \ \theta_{1,...,M}] - \bar{\theta} = \frac{1}{M} \sum_M (\hat{\theta}_m - \theta_m) +$$ +where $\bar{\theta}$ denotes the expectation of the draws from the prior, $P_{\lambda}$. + +Also define the computational variance $V(\hat{g}, \lambda)$ in the same fashion: + +$$ +V(\hat{g}, \lambda) := \frac{1}{M} \sum_M (\hat{\theta}_m - \bar{\theta})^2 +$$ +This is probably familiar - it's straight from the bias-variance decomposition of the mean squared error. + +So to sum it up, we're still doing the repeated "simulate and fit" thing from SBC. But instead of calculating ranks, we calculate a summary statistic which can be directly compared against the prior. And then calculated the bias and variance which can give us some information on how the inference algorithm is screwing up with this particular prior. When combined with the existing SBC tools and plots, such as the chi-square test for rank uniformity, this gives us a powerful package to identify and diagnose various miscalibrations. + +## Demonstration + +Okay, so we explained self-consistency and how the bias-variance decomposition can be potentially used for better identifying miscalibration. In order to demonstrate, we're going to mess with a couple of models and find prior regions where the inference algorithm behaves well and poorly. We'll then investigate how these behaviors are represented by the bias and variance metrics. And finally, we'll demonstrate an iterative update scheme whose final aim is to transport initial prior to a more satisfactory region. + +--- + +Doing the typical library imports here. We'll be using an awesome package that helps us easily perform SBC. Check it out [here](https://github.com/hyunjimoon/SBC) :D +```{r setup, results = 'hide', warning=FALSE, message=FALSE} +library(SBC) +library(cmdstanr) +library(parallel) +library(bayesplot) +library(posterior) +library(dplyr) +library(rstan) +library(future) +library(ggpubr) +library(rstanarm) +library(ggplot2) +options(mc.cores = parallel::detectCores()) +plan(multisession) +options(SBC.min_chunk_size = 5) +``` + +We will be using the eightschools model for demonstration: + +$$ +Y_i \sim \mathrm{normal}(\theta_i, \sigma_i) \\ +\theta'_i \sim \mathrm{normal}(0, 1) \\ +\theta_i = \theta'_i \times \tau + \mu \\ +\mu \sim \mathrm{normal}(0, 5) \\ +\tau \sim \mathrm{normal}(0, 5) +$$ +The parameter of interest here will be $\tau$. In the original model, a prior distribution of $\mathrm{normal}(0, 5)$ has been set, but we'd like to investigate how other prior regions fare in terms of SBC. So we'll try a grid search on the prior hyperparameters with both HMC and optimization and see how the results differ. + +$$ +Y_i \sim \mathrm{normal}(\theta_i, \sigma_i) \\ +\theta'_i \sim \mathrm{normal}(0, 1) \\ +\theta_i = \theta'_i \times \tau + \mu \\ +\mu \sim \mathrm{normal}(0, 5) \\ +\tau \sim \mathrm{normal}(\lambda_{\mu}, \lambda_{\sigma^2}) +$$ +The hyperparameters for the distribution of $\mathrm{log}(\tau)$, $\lambda_{\mu}, \lambda_{\sigma^2}$, will be the subject of the grid search. + +--- +### Setup +We'll set the basic variables here. The number of SBC iterations($M$ in the previous notation) will be set to 100 with 1000 posterior samples($L$). We'll also be rolling with the standard 4 chain setup for HMC: + +```{r, cache = TRUE} +## Generator settings +# number of SBC simulations per iteration (generator) +nsims <- 100 + +# number of observations +nobs <- 10 + +## Backend settings +# number of draws per posterior approximation +ndraws <- 1000 + +# number of chains for hmc posterior approximation +nchains <- 4 + +fixed_args_eightschools <- list(J = 8, nsims = nsims, sigma = c(15, 10, 16, 11, 9, 11, 10, 18), nsims=nsims, dist_types=list(tau="normal")) +``` + +The `SBC` package allows us to define the prior and data generating process, $P_\lambda,f$, purely in R. This serves two purposes. First, we can verify that the model we have specified is consistent with our intentions i.e. contains no bugs. And we can freely simulate parameters and data without having to call Stan. + +The `generator` function is the prior and data generating process: + +```{r, cache = TRUE} +generator_eightschools_ncp <- function(lambdas, fixed_args){ + # fixed value across simulated datasets + nsims <- fixed_args$nsims + J <- fixed_args$J + sigma <- fixed_args$sigma + + lambda_mu <- lambdas$tau$mu + lambda_sigma <- sqrt(lambdas$tau$var) + + # Draw tau from the designated normal distribution + # lambda_sigma is positively bounded + tau <- abs(rnorm(1, lambda_mu, lambda_sigma)) + + # other parameters are drawn from the default prior + mu = rnorm(1, 0, 5) + + theta_trans <- rnorm(J, 0, 1) + + theta <- theta_trans * tau + mu + # draw y from simulated parameters + y <- rnorm(J, theta, sigma) + + list( + variables = list( + tau = tau + ), + generated = list( + J = J, + y = y, + sigma = sigma, + nsims = nsims, + lambda_mu = lambda_mu, + lambda_var = lambdas$tau$var + ) + ) +} +``` + +Note the code line ` tau <- rnorm(1, lambda_mu, lambda_sigma)`. Like exactly in the model specification, we are drawing $\mathrm{log}(\tau)$ given hyperparameters $\lambda_{\mu}, \lambda_{\sigma^2}$. + +Now we define the HMC and optimizing `SBC` backends, which represent $\hat{g}$. + +```{R, warning=FALSE, message=FALSE, error=FALSE} +cmdstan_mod_eightschools <- cmdstanr::cmdstan_model("models/eightschools_ncp_posteriordb.stan") +backend_eightschools_hmc <- SBC_backend_cmdstan_sample(cmdstan_mod_eightschools, chains = 4, iter_sampling = ndraws / 4) + +rstan_mod_eightschools <- rstan::stan_model("models/eightschools_ncp_posteriordb.stan") +backend_eightschools_opt <- SBC_backend_rstan_optimizing(rstan_mod_eightschools, draws=ndraws) + +calib_generator <- function(lambdas, fixed_args){ + generate_datasets(SBC_generator_function(generator_eightschools_ncp, lambdas, fixed_args), n_datasets = fixed_args_eightschools$nsims) +} +``` + +Finally we define a function that computes the DAP and then calculate bias, variance metrics: + +```{r, cache = TRUE} +calculate_dap <- function(lambda_mu, lambda_var, generator, datasets=NULL, backened, fixed_args){ + if(is.null(datasets)){ + lambda_init_eightschools <- list( + tau = list(mu=lambda_mu, var=lambda_var) + ) + datasets <- do.call(generator, list(lambda_init_eightschools, fixed_args = fixed_args)) + } + sbc_result <- compute_results(datasets, backened, thin_ranks = 1) + draws_params <- c() + draws_Y <- c() + prior_thetas <- posterior::extract_variable(datasets$variables, "tau") + theta_bar <- mean(prior_thetas) + + B <- 0 + V <- 0 + for(i in 1:nsims){ + draws_Y <- c(draws_Y, datasets$generated[[i]]$Y) + samples <- SBC_fit_to_draws_matrix(sbc_result$fits[[i]]) + params <- posterior::extract_variable(samples, "tau") + draws_params <- c(draws_params, params) + + B <- B + mean(params) + V <- V + sum((params - mean(params))^2) + } + + B <- B / fixed_args$nsims - theta_bar + V <- V / (fixed_args$nsims * ndraws) + + mu <- mean(draws_params) + var <- sd(draws_params)^2 + + return(list(mu=mu, var=var, draws_params=draws_params, draws_Y=draws_Y, B=B, V=V, datasets=datasets)) +} +``` + +Once we have the model and DAP generation we can start the grid search. We create a grid of mean-variance values and run inference with HMC and optimization for each hyperparameter combination. Note that the same prior-data samples are being used for optimization and HMC: + +```{r include=FALSE, eval=TRUE} +mu_seq <- readRDS(file="DAP_self_calibration_mu_seq.rds") +var_seq <- readRDS(file="DAP_self_calibration_var_seq.rds") +grid_size <- length(mu_seq) * length(var_seq) + +dap_lambda_mu <- readRDS(file="DAP_self_calibration_dap_lambda_mu.rds") +dap_lambda_var <- readRDS(file="DAP_self_calibration_dap_lambda_var.rds") +lambda_mu <- readRDS(file="DAP_self_calibration_lambda_mu.rds") +lambda_var <- readRDS(file="DAP_self_calibration_lambda_var.rds") +B_optim <- readRDS(file="DAP_self_calibration_B_optim.rds") +V_optim <- readRDS(file="DAP_self_calibration_V_optim.rds") +B_hmc <- readRDS(file="DAP_self_calibration_B_hmc.rds") +V_hmc <- readRDS(file="DAP_self_calibration_V_hmc.rds") +dap_lambda_mu_hmc <- readRDS(file="DAP_self_calibration_dap_lambda_mu_hmc.rds") +dap_lambda_var_hmc <- readRDS(file="DAP_self_calibration_dap_lambda_var_hmc.rds") +``` + +```{r, cache=TRUE, warning=FALSE, message=FALSE, eval=FALSE} +gridsize_mu <- 5 +gridsize_var <- 5 +mu_seq <- c(0, 1, 2, 3, 4) +var_seq <- c(1, 2, 5, 7, 10) +grid_size <- length(mu_seq) * length(var_seq) + +dap_lambda_mu <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +dap_lambda_var <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +rownames(dap_lambda_mu) <- mu_seq +rownames(dap_lambda_var) <- mu_seq +colnames(dap_lambda_mu) <- var_seq +colnames(dap_lambda_var) <- var_seq + +lambda_mu <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +lambda_var <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +rownames(lambda_mu) <- mu_seq +rownames(lambda_var) <- mu_seq +colnames(lambda_mu) <- var_seq +colnames(lambda_var) <- var_seq + +B_optim <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +V_optim <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +rownames(B_optim) <- mu_seq +rownames(V_optim) <- mu_seq +colnames(B_optim) <- var_seq +colnames(V_optim) <- var_seq + +B_hmc <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +V_hmc <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +rownames(B_hmc) <- mu_seq +rownames(V_hmc) <- mu_seq +colnames(B_hmc) <- var_seq +colnames(V_hmc) <- var_seq + +dap_lambda_mu_hmc <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +dap_lambda_var_hmc <- array(rep(NA, grid_size), dim = c(gridsize_mu, gridsize_var)) +rownames(dap_lambda_mu_hmc) <- mu_seq +rownames(dap_lambda_var_hmc) <- mu_seq +colnames(dap_lambda_mu_hmc) <- var_seq +colnames(dap_lambda_var_hmc) <- var_seq + +for(j in 1:length(var_seq)){ + for(i in 1:length(mu_seq)){ + dap <- calculate_dap(mu_seq[[i]],var_seq[[j]] , calib_generator, NULL, backend_eightschools_opt, fixed_args_eightschools) + dap_lambda_mu[i, j] <- dap$mu + dap_lambda_var[i, j] <- dap$var + lambda_mu[i, j] <- mu_seq[[i]] + lambda_var[i, j] <- var_seq[[j]] + B_optim[i, j] <- dap$B + V_optim[i, j] <- dap$V + + dap_hmc <- calculate_dap(mu_seq[[i]],var_seq[[j]], calib_generator, dap$datasets, backend_eightschools_hmc, fixed_args_eightschools) + B_hmc[i, j] <- dap_hmc$B + V_hmc[i, j] <- dap_hmc$V + dap_lambda_mu_hmc[i, j] <- dap_hmc$mu + dap_lambda_var_hmc[i, j] <- dap_hmc$var + } +} +``` + +```{r, include=FALSE, eval=FALSE} +saveRDS(mu_seq, file="DAP_self_calibration_mu_seq.rds") +saveRDS(var_seq, file="DAP_self_calibration_var_seq.rds") + +saveRDS(dap_lambda_mu, file="DAP_self_calibration_dap_lambda_mu.rds") +saveRDS(dap_lambda_var, file="DAP_self_calibration_dap_lambda_var.rds") +saveRDS(lambda_mu, file="DAP_self_calibration_lambda_mu.rds") +saveRDS(lambda_var, file="DAP_self_calibration_lambda_var.rds") +saveRDS(B_optim, file="DAP_self_calibration_B_optim.rds") +saveRDS(V_optim, file="DAP_self_calibration_V_optim.rds") +saveRDS(B_hmc, file="DAP_self_calibration_B_hmc.rds") +saveRDS(V_hmc, file="DAP_self_calibration_V_hmc.rds") +saveRDS(dap_lambda_mu_hmc, file="DAP_self_calibration_dap_lambda_mu_hmc.rds") +saveRDS(dap_lambda_var_hmc, file="DAP_self_calibration_dap_lambda_var_hmc.rds") + +``` + +After waiting quite a bit for the computation to finish, we can compare the results between HMC and optimization. Let's first look at optimization's prior plot: + +```{r, cache=TRUE, fig.align='center', fig.cap="prior-posterior grid plot of optimization"} + +scaleFUN <- function(x) sprintf("%.2f", x) +dap_df <- as.data.frame.table(dap_lambda_mu) +colnames(dap_df)[1] <- "mu" +colnames(dap_df)[2] <- "var" +dap_df[, "mu"] <- as.numeric(as.vector(dap_df[, "mu"])) # not using as.vector converts the factor indices +dap_df[, "var"] <- as.numeric(as.vector(dap_df[, "var"])) +colnames(dap_df)[3] <- "dap_mu" +dap_lambda_df <- as.data.frame.table(dap_lambda_var) +dap_df[, "dap_var"] <- as.numeric(dap_lambda_df$Freq) +ggplot(dap_df, mapping=aes(x="mu", y="var")) + geom_point(aes(x=mu, y=var), color = "red") + geom_point(aes(x=dap_mu, y = dap_var), color = "blue") + scale_y_continuous(labels=scaleFUN) + geom_segment(aes(x=mu, y=var, xend=dap_mu, yend=dap_var), size=0.2, arrow = arrow(length = unit(0.3, "cm"))) + xlab("mean") + ylab("variance") + ggtitle("optimization") +``` + +The red points on the bottom right denote the initial hyperparameter values; variance plotted against mean. The blue points are the same statistics calculated against samples from the DAP. If a given prior was to be self-consistent, we should expect little discrepancy between the initial prior values and the DAP. We can see that some priors show very large variance values recovered from the DAP, which is assumed to be caused from the combination of an extreme prior and optimization not finding the optima. + +Let's zoom in to identify which prior combinations are being problematic: + +```{R, fig.align='center', fig.cap="zoomed prior-posterior grid plot of HMC"} +ggplot(dap_df, mapping=aes(x="mu", y="var")) + geom_point(aes(x=mu, y=var), color = "red") + geom_point(aes(x=dap_mu, y = dap_var), color = "blue") + scale_y_continuous(labels=scaleFUN) + geom_segment(aes(x=mu, y=var, xend=dap_mu, yend=dap_var), size=0.2, arrow = arrow(length = unit(0.3, "cm"))) + xlab("mean") + ylab("variance") + ggtitle("optimization") + coord_cartesian(ylim=c(0, 20), xlim = c(0, 5)) + scale_y_continuous(breaks=c(1:20)) +``` +For priors with mean value zero, DAP values couldn't be calculated, resulting in `Inf`. Other priors show the recovered DAP variance values to be spurious and extremely large. + + +Let's see if HMC fares better: + +```{r, cache=TRUE, fig.align='center', fig.cap="prior-posterior grid plot of HMC"} + +scaleFUN <- function(x) sprintf("%.2f", x) +dap_df <- as.data.frame.table(dap_lambda_mu_hmc) +colnames(dap_df)[1] <- "mu" +colnames(dap_df)[2] <- "var" +dap_df[, "mu"] <- as.numeric(as.vector(dap_df[, "mu"])) # not using as.vector converts the factor indices +dap_df[, "var"] <- as.numeric(as.vector(dap_df[, "var"])) +colnames(dap_df)[3] <- "dap_mu" +dap_lambda_df <- as.data.frame.table(dap_lambda_var_hmc) +dap_df[, "dap_var"] <- as.numeric(dap_lambda_df$Freq) +ggplot(dap_df, mapping=aes(x="mu", y="var")) + geom_point(aes(x=mu, y=var), color = "red") + geom_point(aes(x=dap_mu, y = dap_var), color = "blue") + scale_y_continuous(labels=scaleFUN, breaks=c(1:10)) + geom_segment(aes(x=mu, y=var, xend=dap_mu, yend=dap_var), size=0.2, arrow = arrow(length = unit(0.3, "cm"))) + xlab("mean") + ylab("variance") + ggtitle("HMC") +``` + +Although to a lesser degree, we can still observe some deviations from the prior. But overall, HMC performs better inference preserving self-calibration compared to optimization. The deviations aren't due to sampling errors but instead the posterior moving toward regions favoring the likelihood, since we're using `sigma` values from the original model's data, instead of simulated values. + +We'll now plot the bias and variance metrics of optimization and HMC together. Since HMC was relatively well calibrated as to optimization, we should observe its metrics being lower than that of optimization: + +```{r, cache=TRUE, fig.width = 14, fig.align='center'} + +B_optim_df <- as.data.frame.table(B_optim) +colnames(B_optim_df)[colnames(B_optim_df) == "Var1"] <- "Mu" +colnames(B_optim_df)[colnames(B_optim_df) == "Var2"] <- "Var" +colnames(B_optim_df)[colnames(B_optim_df) == "Freq"] <- "B" +B_optim_df[, "type"] = "optimization" + +B_hmc_df <- as.data.frame.table(B_hmc) +colnames(B_hmc_df)[colnames(B_hmc_df) == "Var1"] <- "Mu" +colnames(B_hmc_df)[colnames(B_hmc_df) == "Var2"] <- "Var" +colnames(B_hmc_df)[colnames(B_hmc_df) == "Freq"] <- "B" +B_hmc_df[, "type"] = "hmc" + +B_df = rbind(B_optim_df, B_hmc_df) +B_df[, "B"] <- abs(B_df[, "B"]) + +breaks <- c(-0.1, 0.01, 0.1, 1, 2, 4, 16) + +ggplot(na.omit(B_df)) + geom_tile(aes(Mu, Var, fill=B)) + coord_fixed() + facet_grid(cols=vars(type)) + scale_fill_gradient2(low = "blue", mid="white", high = "red", trans="log", breaks=breaks) + ggtitle("abs(B) plot for HMC and optimization (lower is better)") +``` +Note that bias values for optimization at mean = 0 was unable to be calculated, since optimization returned extreme results. + +We can observe that HMC possesses far smaller bias metric values, implying that HMC contains less bias within its inference results as to optimization. In addition, some prior regions, such as for cases where `Var <= 2` and `Mu >= 2` yields the lowest bias values, suggesting that these regions are preferable as to other regions for minimal bias. + +```{r, cache=TRUE, fig.width = 14, fig.align='center'} + +V_optim_df <- as.data.frame.table(V_optim) +colnames(V_optim_df)[colnames(V_optim_df) == "Var1"] <- "Mu" +colnames(V_optim_df)[colnames(V_optim_df) == "Var2"] <- "Var" +colnames(V_optim_df)[colnames(V_optim_df) == "Freq"] <- "V" +V_optim_df[, "type"] = "optimization" + +V_hmc_df <- as.data.frame.table(V_hmc) +colnames(V_hmc_df)[colnames(V_hmc_df) == "Var1"] <- "Mu" +colnames(V_hmc_df)[colnames(V_hmc_df) == "Var2"] <- "Var" +colnames(V_hmc_df)[colnames(V_hmc_df) == "Freq"] <- "V" +V_hmc_df[, "type"] = "hmc" + +V_df = rbind(V_optim_df, V_hmc_df) +V_df[, "V"] <- abs(V_df[, "V"]) + +ggplot(na.omit(V_df)) + geom_tile(aes(Mu, Var, fill=V)) + coord_fixed() + facet_grid(cols=vars(type)) + scale_fill_gradient2(low = "blue", mid="white", high = "red", trans="log") + ggtitle("abs(V) plot for HMC and optimization (lower is better)") +``` + +For the variance metric $V$, we can again see that HMC outperforms optimization, showing that the dispersion of recovered posterior values are far lower, suggesting a much lower level of error as to optimization. + +### Iteratively updating prior for well-calibration + +Up to now, we've worked with metrics and identified various regions where miscalibration occured, or on the other side, well-calibrated. Can we try and go the other direction? How can we use the prior-posterior discrepancy information to refine our prior? In this section, we explore a prior update algorithm which attempts to move from a badly calibrated region to a more well-calibrated area. + +The iterative update algorithm uses the metric attained from a calculated DAP to update the prior's hyperparameters. Recall the DAP equation: + +$$ +\pi(\tilde{\theta}) = \int \pi(\tilde{\theta} \ |\ y) \pi(y \ | \ \theta) \pi(\theta) \mathrm{d}y \ \mathrm{d}\theta +$$ + +We generate metric from the DAP $\pi(\tilde{\theta})$, which allows us to directly measure the discrepancy between it and the prior distribution's hyperparameters, $\pi(\theta) = P_\lambda$. The discrepency can be used to guide a local search along the prior space. The increment of the descent step can be varied, but we provide a few options: + +1. "Markov chain"-like: This method reuses the calculated summary statistics of the DAP as the prior hyperparameter values of the next iterations. It's the simplest option, but can be effective for simpler models. +2. Cubic weights: We increment/decrement the hyperparameters in cubic order so that as the discrepancy gets smaller, the stepsize also gets smaller. On the otherhand, large discrepency can result in exponential stepsizes which causes the hyperparameters to rapidly shrink towards the DAP summary statistic. It was found that this method was effective in speeding up calibration for priors with a relatively scale-invariant region, since the cubic nature may only allow subtle movements near the ambient region of the DAP statistic. + +We'll try to start from a "bad", miscalibrated prior region and try to iteratively refine the prior into a more favorable region. We'll be starting from $N(2, 10)$ with optimization. Let's generate a rank plot as a quick check to see how bad the region is: +```{R, warning=FALSE, error=FALSE} +lambda_init_eightschools <- list( + tau = list(mu=5, var=10) +) + +datasets_eightschools_new <- generate_datasets(SBC_generator_function(generator_eightschools_ncp, lambda_init_eightschools, fixed_args_eightschools), n_sims = fixed_args_eightschools$nsims) + +result_eightschools_opt_new <- compute_results(datasets_eightschools_new, backend_eightschools_opt, thin_ranks = 1) + +plot_rank_hist(result_eightschools_opt_new, variables=list("tau")) +``` + +We can see that the ranks are not uniform, and thus the starting point is not well calibrated. Let's try the iterative update: + +```{R, warning=FALSE, message=FALSE} +updator = "mc_update" + + +# tolerance +tol <- 0.01 + +# learning rate +gamma <- 1.5 # 0.5 for gradient update, 10 for normal_str_update + +sc_eightschools_opt <- self_calib_adaptive(calib_generator, backend_eightschools_opt, updator, c("tau"), lambda_init_eightschools, nsims, 15, gamma, tol, fixed_args = fixed_args_eightschools) +``` + +This algorithm continuously updates the prior with the previous iteration's posterior, which drives the posterior into a well-calibrated region. + +```{R, warning=FALSE, error=FALSE} +sc_eightschools_opt$t_df +``` + +The iterative update algorithm determined the following $\mu, \sigma^2$ prior to be better calibrated: + +```{r} +sc_eightschools_opt$lambda$var = sc_eightschools_opt$lambda$var ^ 2 +sc_eightschools_opt$lambda +``` + +Let's check the rank plots at the region: + + +```{R, warning=FALSE, error=FALSE} +datasets_eightschools_new <- generate_datasets(SBC_generator_function(generator_eightschools_ncp, sc_eightschools_opt$lambda, fixed_args_eightschools), n_sims = fixed_args_eightschools$nsims) + +result_eightschools_opt_new <- compute_results(datasets_eightschools_new, backend_eightschools_opt, thin_ranks = 1) + +plot_rank_hist(result_eightschools_opt_new, variables=list("tau")) +``` + +We can identify that the rank plot has been significantly improved, which suggests the refined prior may be considered for usage if self-calibration is desired. + +We'll try again with HMC this time: + + +```{R, warning=FALSE, message=FALSE} +updator = "mc_update" + + +# tolerance +tol <- 0.01 + +# learning rate +gamma <- 1.5 # 0.5 for gradient update, 10 for normal_str_update + +sc_eightschools_opt <- self_calib_adaptive(calib_generator, backend_eightschools_hmc, updator, c("tau"), lambda_init_eightschools, nsims, 10, gamma, tol, fixed_args = fixed_args_eightschools) + +datasets_eightschools_new <- generate_datasets(SBC_generator_function(generator_eightschools_ncp, sc_eightschools_opt$lambda, fixed_args_eightschools), n_sims = fixed_args_eightschools$nsims) + +result_eightschools_opt_new <- compute_results(datasets_eightschools_new, backend_eightschools_opt, thin_ranks = 1) + +plot_rank_hist(result_eightschools_opt_new, variables=list("tau")) +``` +## SBC on Binom + +```{R, warning=FALSE, message=FALSE} +nsims <- 200 + +# number of observations +nobs <- 10#2 + +# link function (1 = logit, 2 = probit, 3 = cloglog) +link <- 1 + +# number of binomial trials per observation +nsize <- 10 + +## Backend settings +# number of draws per posterior approximation +ndraws <- 100 + +# number of chains for hmc posterior approximation +nchains <- 2 + +generator_binom <- function(lambdas, fixed_args){ + # fixed value across simulated datasets + # experiment settings + nobs <- fixed_args$nobs + nsize <- fixed_args$nsize + dist_types <- fixed_args$dist_types + # modular settings + link_type <- fixed_args$link_type + + # generate + lambda_arg1 <- c() + lambda_arg2 <- c() + if(dist_types$eta == "normal"){ + eta <- rnorm(1, mean = lambdas$eta$mu, sd=lambdas$eta$sigma) + lambda_arg1 <- c(lambda_arg1, lambdas$eta$mu) + lambda_arg2 <- c(lambda_arg2, lambdas$eta$sigma) + } + else if(dist_types$eta == "gamma"){ + eta <- rgamma(1, shape = lambdas$eta$alpha, rate = lambdas$eta$beta) + lambda_arg1 <- c(lambda_arg1, lambdas$eta$alpha) + lambda_arg2 <- c(lambda_arg2, lambdas$eta$beta) + } + + + + mu <- invtf_param_vec(eta, link_type = link_type) + Y <- rbinom(nobs, size = nsize, prob = mu) + list( + parameters = list(eta = eta), + generated = list(nobs= nobs, nsize = nsize, link = link_type, + dist_types = match(unlist(dist_types), c("normal", "gamma")), lambda_arg1 = lambda_arg1, lambda_arg2 = lambda_arg2, + Y = Y) + ) +} + +fixed_args_binom <- list(nobs = nobs, nsize = nsize, link_type = 1, nsims = nsims, ndraws = ndraws, dist_types=list(eta="normal")) +``` + +```{R, warning=FALSE, error=FALSE} +## Generator settings +# number of SBC simulations per iteration (generator) + + +lambda_init_binom <- list( + eta = list(mu=100, sigma=100) +) +datasets_binom <- SBC::generate_datasets(SBC_generator_function(generator_binom, lambda_init_binom, fixed_args_binom), n_datasets = fixed_args_binom$nsims) + +# hyperparameter update algorithm +updator = "mc_update" + +# maximal number of SBC iterations +niter <- 100 + +# tolerance +tol <- 0.1 + +# learning rate +gamma <- 1.5 # 0.5 for gradient update, 10 for normal_str_update + +# step2: inferring posterior +rstan_binom_mod <- stan_model("models/binom-laplace.stan") +cmdstan_binom_mod <- cmdstanr::cmdstan_model("models/binom-laplace.stan") + +backend_binom_opt <- SBC_backend_rstan_optimizing(rstan_binom_mod, draws = ndraws) +#backend_binom_hmc <- SBC_backend_cmdstan_sample(cmdstan_binom_mod, chains = 4, iter_sampling = ndraws / 4) # thin = 10 + +# initial badly calibrated +#result_binom_opt <- compute_results(datasets_binom, backend_binom_opt, thin_ranks = 1) + +``` + +HMC on the otherhand couldn't converge to a well-calibrated region. + +## The role of the calibration algorithm and the interpretation of calibrated $\lambda$ + +The calibration algorithm iteratively updates the prior, $\lambda$, in an attempt to move to a calibrated region. This means That its hyperparameter values, mean and variance in the case of the Normal distribution, are the target of the update. We would like to address the statement of whether basic priors are sufficient in representing computationally calibrated priors. + +In the case of the sequential update algorithm, the goal is to allocate high probability mass on desirable regions, and vice versa. The resulting calibrated only conveys the following information: It was found that the region centered on the resulting distribution was (relatively) well calibrated. + +We would believe that in most cases the calibrated prior will not be used directly; instead it will be used as a reference for determining the bounds of the calibrated region. Given this information, domain knowledge will be elicited into an actual prior distribution, while hopefully abiding the calibrated region information. + +In conclusion, finding calibrated regions does not involve domain knowledge elicitation and in most cases the simple probability densities may be sufficient just for identifying calibrated regions of the prior. The user then may use this range of values as a reference when defining priors in an attempt for a computationally calibrated prior. + +In addition, we have demonstrated that it struggles in highly sensitive and difficult prior regions; this method may not be preferable for models in which prior influence trumps that of the likelihood. + +## Wrap-up + +We've explored the concept of self-consistency and with prior-posterior grid plots identified prior regions which are deemed to be miscalibrated. Furthermore, we've used two metric, the bias and variance calibration metrics to identify in detail how different prior regions behave with respect to an inference algorithm. + +Finally, we showed by iteratively updating the prior we were able to go from a poorly calibrated region to a well-calibrated region. The hyperparameters of the prior were sequentially updated until relative calibration was reached, giving the user some reference point of a calibrated region to aid in prior construction. diff --git a/vignettes/basic_usage.Rmd b/vignettes/SBC.Rmd similarity index 79% rename from vignettes/basic_usage.Rmd rename to vignettes/SBC.Rmd index a1f9585..8ebc680 100644 --- a/vignettes/basic_usage.Rmd +++ b/vignettes/SBC.Rmd @@ -1,10 +1,10 @@ --- -title: "SBC Interface Introduction" +title: "Getting Started with SBC" author: "Hyunji Moon, Martin Modrák, Shinyoung Kim" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{SBC Interface Introduction} + %\VignetteIndexEntry{Getting Started with SBC} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- @@ -14,19 +14,19 @@ vignette: > SBC stands for "simulation-based calibration" and it is a tool to validate statistical models and/or algorithms fitting those models. In SBC we are given a statistical model, -a method to generate samples from the prior predictive distribution +a method to generate draws from the prior predictive distribution (i.e. generate simulated datasets that match the model's priors + likelihood) and an algorithm that fits the model to data. -The rough sketch of SBC is that we simulate some datasets and then for each dataset: +The rough sketch of SBC is that we simulate some datasets and then for each simulated dataset: -1. Fit the model and obtain $S$ independent samples from the posterior. -2. For each parameter, take the rank of the simulated parameter value within the - posterior samples - - Where rank is defined as the number of samples < simulated value +1. Fit the model and obtain $D$ independent draws from the posterior. +2. For each variable of interest, take the rank of the simulated value within the + posterior draws + - Where rank is defined as the number of draws < simulated value It can be shown that if model matched the generator and algorithm works correctly, -then for each parameter, the ranks obtained in SBC should be uniformly distributed between $0$ and $S$. +then for each variable, the ranks obtained in SBC should be uniformly distributed between $0$ and $D$. This corresponds quite directly to claims like "the posterior 84% credible interval should contain the simulated value in 84% of simulations", the rank uniformity represents this claim for all interval widths at once. The theory of SBC is fully @@ -94,22 +94,32 @@ implementing the same thing twice is costly. But statistical models are usually pieces of code and the added cost of the second implementation (the generator) thus tends to very small. +## Naming + +To avoid confusion the package and the docs tries to consistently give the same meaning to the following potentially ambiguous words: + +- _variable_ All quantities of interest for SBC - this includes both parameters that are directly estimated by the model and quantities derived from those parameters. +- _draws_ are assumed to come from either a single realized posterior distribution of a fitted model or the prior distribution of the model. The number of draws ( `n_draws`) is the number of posterior draws produced by fitting the model. +- _simulation_ / _sim_ a set of simulated values for all variables and the accompanying generated data. I.e. the number of simulations (`n_sims`) is the number of times an individual model is fitted +- _fit_ represents the result of fitting a single simulation + + ## Overview of the Architecture ![Overview of the package structure](overview_wide.png) To perform SBC, one needs to first generate simulated datasets and then fit the -model to those datasets. The `SBC_datasets` object holds the simulated prior and data samples. +model to those simulations. The `SBC_datasets` object holds the simulated prior and data draws. `SBC_datasets` objects can be created directly by the user, but it is often easier to use one -of provided _Generator_ implementations that let you e.g. wrap a function that returns the parameters and simulated data for a single dataset or use a `brms` specification to generate -samples corresponding to a given `brms` model. +of provided _Generator_ implementations that let you e.g. wrap a function that returns the variables and observed data for a single simulation or use a `brms` specification to generate +draws corresponding to a given `brms` model. The other big part of the process is a _backend_. -The SBC package uses a backend object to actually fit the model to the simulated data and generate posterior samples. In short, backend bunches together the algorithm in which inference is ran (`cmdstanr`, `rstan`, `brms`, `jags`, etc.), the model, and additional platform-specific inference parameters which are necessary to run inference for the model-platform combination (e.g. number of iterations, initial values, ...). -In other words backend is a function that takes data as its only input and provides posterior samples. +The SBC package uses a backend object to actually fit the model to the simulated data and generate posterior draws. In short, backend bundles together the algorithm in which inference is run (`cmdstanr`, `rstan`, `brms`, `jags`, etc.), the model, and additional platform-specific inference parameters which are necessary to run inference for the model-platform combination (e.g. number of iterations, initial values, ...). +In other words backend is a function that takes data as its only input and provides posterior draws. -Once we have a backend and an `SBC_datasets` instance, we can call `compute_results` +Once we have a backend and an `SBC_datasets` instance, we can call `compute_SBC` to actually perform the SBC. The resulting object can then be passed to various plotting and summarising functions to let us easily learn if our model works as expected. @@ -121,12 +131,13 @@ setup and configure our environment. ```{r setup, message=FALSE,warning=FALSE, results="hide"} library(SBC); -use_cmdstanr <- TRUE # Set to false to use rstan instead +use_cmdstanr <- getOption("SBC.vignettes_cmdstanr", TRUE) # Set to false to use rstan instead if(use_cmdstanr) { library(cmdstanr) } else { library(rstan) + rstan_options(auto_write = TRUE) } options(mc.cores = parallel::detectCores()) @@ -141,7 +152,11 @@ plan(multisession) options(SBC.min_chunk_size = 5) # Setup caching of results -cache_dir <- "./basic_usage_SBC_cache" +if(use_cmdstanr) { + cache_dir <- "./_basic_usage_SBC_cache" +} else { + cache_dir <- "./_basic_usage_rstan_SBC_cache" +} if(!dir.exists(cache_dir)) { dir.create(cache_dir) } @@ -167,16 +182,16 @@ if(use_cmdstanr) { ### Generator -Once we have defined the model, we can create a generator function which will generate prior and data samples: +Once we have defined the model, we can create a generator function which will generate prior and data draws: ```{r} -# A generator function should return a named list containing elements "parameters" and "generated" +# A generator function should return a named list containing elements "variables" and "generated" poisson_generator_single <- function(N){ # N is the number of data points we are generating lambda <- rgamma(n = 1, shape = 15, rate = 5) y <- rpois(n = N, lambda = lambda) list( - parameters = list( + variables = list( lambda = lambda ), generated = list( @@ -187,20 +202,20 @@ poisson_generator_single <- function(N){ # N is the number of data points we ar } ``` -As you can see, the generator returns a named list containing random samples from the prior and generated data realized from the prior samples - the data are already in the format expected by Stan. +As you can see, the generator returns a named list containing random draws from the prior and generated data realized from the prior draws - the data are already in the format expected by Stan. -### Create `SBC_Datasets` from generator +### Create `SBC_datasets` from generator `SBC` provides helper functions `SBC_generator_function` and `generate_datasets` which takes a generator function and calls it repeatedly to create a valid `SBC_datasets` object. ```{r} set.seed(54882235) -n_datasets <- 100 # Number of SBC iterations to run +n_sims <- 100 # Number of SBC iterations to run poisson_generator <- SBC_generator_function(poisson_generator_single, N = 40) poisson_dataset <- generate_datasets( poisson_generator, - n_datasets) + n_sims) ``` @@ -224,10 +239,10 @@ if(use_cmdstanr) { ### Computing Ranks -we can then use `compute_results` to fit our datasets with the backend: +we can then use `compute_SBC` to fit our simulations with the backend: ```{r, results=FALSE} -results <- compute_results(poisson_dataset, poisson_backend, +results <- compute_SBC(poisson_dataset, poisson_backend, cache_mode = "results", cache_location = file.path(cache_dir, "results")) ``` @@ -245,7 +260,7 @@ results$stats ### Plots -And finally, we can plot the rank distribution to check if the ranks are uniformly distributed. We can check the rank histogram and ECDF plots: +And finally, we can plot the rank distribution to check if the ranks are uniformly distributed. We can check the rank histogram and ECDF plots (see `vignette("rank_visualizations")` for description of the plots): ```{r rank_hist} plot_rank_hist(results) @@ -303,3 +318,7 @@ practice in [model-building workflow](https://hyunjimoon.github.io/SBC/articles/ Alternatively, you might be interested in the [limits of SBC](https://hyunjimoon.github.io/SBC/articles/limits_of_SBC.html) --- the types of problems that are hard / impossible to catch with SBC and what can we do to guard against those. + +## Acknowledgements + +Development of this package was supported by [ELIXIR CZ](https://www.elixir-czech.cz/) research infrastructure project (Ministry of Youth, Education and Sports of the Czech Republic, Grant No: LM2018131) including access to computing and storage facilities. diff --git a/vignettes/bad_parametrization.Rmd b/vignettes/bad_parametrization.Rmd index aeac69d..76bd081 100644 --- a/vignettes/bad_parametrization.Rmd +++ b/vignettes/bad_parametrization.Rmd @@ -19,12 +19,13 @@ Let's setup the environment: ```{r setup, message=FALSE,warning=FALSE, results="hide"} library(SBC); -use_cmdstanr <- TRUE # Set to false to use rstan instead +use_cmdstanr <- getOption("SBC.vignettes_cmdstanr", TRUE) # Set to false to use rstan instead if(use_cmdstanr) { library(cmdstanr) } else { library(rstan) + rstan_options(auto_write = TRUE) } options(mc.cores = parallel::detectCores()) @@ -38,7 +39,11 @@ options(mc.cores = parallel::detectCores()) # Setup caching of results -cache_dir <- "./bad_parametrization_SBC_cache" +if(use_cmdstanr) { + cache_dir <- "./_bad_parametrization_SBC_cache" +} else { + cache_dir <- "./_bad_parametrization_rstan_SBC_cache" +} if(!dir.exists(cache_dir)) { dir.create(cache_dir) } @@ -76,16 +81,16 @@ Build a generator to create simulated datasets. ```{r} set.seed(21448857) -n_datasets <- 10 +n_sims <- 10 -single_dataset_gamma <- function(N) { +single_sim_gamma <- function(N) { shape <- rlnorm(n = 1, meanlog = 0, sdlog = 1) scale <- rlnorm(n = 1, meanlog = 0, sdlog = 1.5) y <- rgamma(N, shape = shape, scale = scale) list( - parameters = list( + variables = list( shape = shape, scale = scale), generated = list( @@ -95,16 +100,16 @@ single_dataset_gamma <- function(N) { } -generator_gamma <- SBC_generator_function(single_dataset_gamma, N = 40) +generator_gamma <- SBC_generator_function(single_sim_gamma, N = 40) datasets_gamma <- generate_datasets( generator_gamma, - n_datasets) + n_sims) ``` ```{r} -results_gamma <- compute_results(datasets_gamma, backend_gamma, +results_gamma <- compute_SBC(datasets_gamma, backend_gamma, cache_mode = "results", cache_location = file.path(cache_dir, "model1")) ``` @@ -112,7 +117,7 @@ results_gamma <- compute_results(datasets_gamma, backend_gamma, Here we also use the caching feature to avoid recomputing the fits when recompiling this vignette. In practice, caching is not necessary but is often useful. -10 simulations are enough to see something is wrong with the model. The problem is best seen on an `ecdf_diff` plot - we even see the issue is primarily with the `scale` parameter! +10 simulations are enough to see something is wrong with the model. The problem is best seen on an `ecdf_diff` plot - we even see the issue is primarily with the `scale` variable! ```{r results1_ecdf_diff} plot_ecdf_diff(results_gamma) @@ -151,7 +156,7 @@ if(use_cmdstanr) { ```{r} -results_gamma2 <- compute_results(datasets_gamma, backend_gamma_2, +results_gamma2 <- compute_SBC(datasets_gamma, backend_gamma_2, cache_mode = "results", cache_location = file.path(cache_dir, "model2")) ``` diff --git a/vignettes/brms.Rmd b/vignettes/brms.Rmd index e2c5b1a..5ff8dfe 100644 --- a/vignettes/brms.Rmd +++ b/vignettes/brms.Rmd @@ -18,8 +18,15 @@ Let's setup the environment: library(SBC) library(brms) library(ggplot2) -options(brms.backend = "cmdstanr") -# options(brms.backend = "rstan") # Uncomment to use rstan instead + +use_cmdstanr <- getOption("SBC.vignettes_cmdstanr", TRUE) # Set to false to use rstan instead + +if(use_cmdstanr) { + options(brms.backend = "cmdstanr") +} else { + options(brms.backend = "rstan") + rstan::rstan_options(auto_write = TRUE) +} # Using parallel processing library(future) @@ -31,7 +38,11 @@ plan(multisession) options(SBC.min_chunk_size = 5) # Setup caching of results -cache_dir <- "./brms_SBC_cache" +if(use_cmdstanr) { + cache_dir <- "./_brms_SBC_cache" +} else { + cache_dir <- "./_brms_rstan_SBC_cache" +} if(!dir.exists(cache_dir)) { dir.create(cache_dir) } @@ -48,9 +59,9 @@ cannot be found as it will most likely affect the generator and the backend in t Still this can be useful for validating `brms` itself - we'll get to validation with custom generators in a while. For now, we'll build a generator using `brms` directly. -Generating datasets with this generator requires us to compile a Stan model +Generating simulations with this generator requires us to compile a Stan model and may thus take a while. Also the exploration is often problematic, so -to avoid problems, we take a lot of samples and thin the resulting samples heavily. +to avoid problems, we take a lot of draws and thin the resulting draws heavily. ```{r} # We need a "template dataset" to let brms build the model. @@ -82,10 +93,11 @@ generator) ```{r} backend <- SBC_backend_brms_from_generator(generator, chains = 1, thin = 1, - init = 0.1) + warmup = 500, iter = 1500, + inits = 0.1) # More verbose alternative that results in exactly the same backend: -# backend <- SBC_backend_brms(y ~ x, template_dataset = template_data, prior = priors, warmup = 500, iter = 1000, chains = 1, thin = 1 +# backend <- SBC_backend_brms(y ~ x, template_data = template_data, prior = priors, warmup = 500, iter = 1000, chains = 1, thin = 1 # init = 0.1) ``` @@ -93,7 +105,7 @@ backend <- SBC_backend_brms_from_generator(generator, chains = 1, thin = 1, Compute the actual results ```{r} -results <- compute_results(datasets, backend, thin_ranks = 10, +results <- compute_SBC(datasets, backend, cache_mode = "results", cache_location = file.path(cache_dir, "first")) ``` @@ -116,7 +128,7 @@ Let's take a bit more complex model - with a single varying intercept. This time we will not use the `brms` model to also simulate from prior, but simulate using an R function. This way, we get to learn if `brms` does what we think it does! -Custom generator code also allows us to have different covariate values for each dataset, potentially improving sensitivity +Custom generator code also allows us to have different covariate values for each simulation, potentially improving sensitivity if we want to check the model for a range of potential covariate values. If on the other hand we are interested in a specific dataset, it might make more sense to use the predictors as seen in the dataset in all simulations to focus @@ -130,7 +142,7 @@ need to be careful to match the parameter names as `brms` uses them. You can call `parnames` on a fit to see them. ```{r} -one_dataset_generator <- function(N, K) { +one_sim_generator <- function(N, K) { # N - number of datapoints, K number of groups for the varying intercept stopifnot(3 * K <= N) x <- rnorm(N) + 5 @@ -153,7 +165,7 @@ one_dataset_generator <- function(N, K) { y <- rnorm(N, predictor, sigma) list( - parameters = list( + variables = list( b_Intercept = b_Intercept, b_x = b_x, sd_group__Intercept = sd_group__Intercept, @@ -164,7 +176,7 @@ one_dataset_generator <- function(N, K) { ) } -n_dataset_generator <- SBC_generator_function(one_dataset_generator, N = 18, K = 5) +n_sims_generator <- SBC_generator_function(one_sim_generator, N = 18, K = 5) ``` For increased sensitivity, we also add the log likelihood of the data given parameters @@ -180,10 +192,10 @@ log_lik_gq_func <- generated_quantities( ```{r} set.seed(12239755) -datasets_func <- generate_datasets(n_dataset_generator, 100) +datasets_func <- generate_datasets(n_sims_generator, 100) ``` -This is then our `brms` backend - note that `brms` requires us to provide a sample +This is then our `brms` backend - note that `brms` requires us to provide a dataset that it will use to build the model (e.g. to see how many levels of various varying intercepts to include): @@ -196,13 +208,13 @@ priors_func <- prior(normal(0,1), class = "b") + backend_func <- SBC_backend_brms(y ~ x + (1 | group), prior = priors_func, chains = 1, - template_dataset = datasets_func$generated[[1]]) + template_data = datasets_func$generated[[1]]) ``` So we can happily compute: ```{r} -results_func <- compute_results(datasets_func, backend_func, thin_ranks = 10, +results_func <- compute_SBC(datasets_func, backend_func, gen_quants = log_lik_gq_func, cache_mode = "results", cache_location = file.path(cache_dir, "func")) @@ -217,12 +229,12 @@ plot_rank_hist(results_func) plot_ecdf_diff(results_func) ``` -It looks like there is a problem affecting at least the `b_Intercept` and `sigma` parameters. +It looks like there is a problem affecting at least the `b_Intercept` and `sigma` variables. We may also notice that the `log_lik` (log likelihood derived from all the parameters) is copying -the behaviour of the worst behaving parameter. This tends to be the case in many models, so in models with lots of parameters, it can be useful to add such a term as they make noticing problems easier. +the behaviour of the worst behaving variable. This tends to be the case in many models, so in models with lots of variables, it can be useful to add such a term as they make noticing problems easier. What happened is that `brms` by default centers all the predictors, which changes the -numerical values of the intercept (but not other terms). The interaction with the prior than probably also affects the other parameters. +numerical values of the intercept (but not other terms). The interaction with the prior than probably also affects the other variables. Maybe we don't want `brms` to do this --- using `0 + Intercept` syntax avoids the centering, so we build a new backend that should match our simulator better @@ -238,15 +250,15 @@ priors_func2 <- prior(normal(0,1), class = "b") + backend_func2 <- SBC_backend_brms(y ~ 0 + Intercept + x + (1 | group), prior = priors_func2, warmup = 1000, iter = 2000, chains = 1, - template_dataset = datasets_func$generated[[1]]) + template_data = datasets_func$generated[[1]]) ``` -Let's fit the same datasets with the new backend. +Let's fit the same simulations with the new backend. ```{r} -results_func2 <- compute_results(datasets_func, backend_func2, thin_ranks = 10, +results_func2 <- compute_SBC(datasets_func, backend_func2, gen_quants = log_lik_gq_func, cache_mode = "results", cache_location = file.path(cache_dir, "func2")) diff --git a/vignettes/computational_algorithm1.Rmd b/vignettes/computational_algorithm1.Rmd index 73d6d47..1654269 100644 --- a/vignettes/computational_algorithm1.Rmd +++ b/vignettes/computational_algorithm1.Rmd @@ -1,12 +1,12 @@ --- -title: "SBC for ADVI in Stan (+HMMs)" +title: "SBC for ADVI and optimizing in Stan (+HMMs)" author: "Hyunji Moon, Martin Modrák" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: toc: yes vignette: > - %\VignetteIndexEntry{SBC for ADVI in Stan (+HMMs)} + %\VignetteIndexEntry{SBC for ADVI and optimizing in Stan (+HMMs)} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} --- @@ -21,11 +21,11 @@ penalizing tendencies of convergence metric, and slow convergence of the optimization process. We'll discuss 3 examples: - In Example I a simple Poisson model is shown that is -well handled by default ADVI if the size of the data is small, but becomes miscalibrated with larger datasets. +well handled by default ADVI if the size of the data is small, but becomes miscalibrated when larger amount of observations is available. It also turns out that for such a simple model using `optimizing` leads to very good results. -- In Example II we discuss a Hidden Markov Model where the approximation by ADVI is imperfect but not very wrong. We also show how the (mis)calibration responds to changing parameters of the ADVI implementation. +- In Example II we discuss a Hidden Markov Model where the approximation by ADVI is imperfect but not very wrong. We also show how the (mis)calibration responds to changing parameters of the ADVI implementation and that `optimizing` performs worse than ADVI. -- In Example III we show that a small modification to the model from Example II makes the ADVI approximation almost useless. +- In Example III we show that a small modification to the model from Example II makes the ADVI approximation perform much worse. When the fit between posterior and approximation family, convergence metric and its process are checked so that efficiency is gained without sacrificing @@ -51,15 +51,13 @@ ever needing to run full HMC for your model. Let's start by setting up our environment. ```{r setup, message=FALSE,warning=FALSE, results="hide"} -library(SBC); +library(SBC) library(ggplot2) -use_cmdstanr <- TRUE # Set to false to use rstan instead +library(cmdstanr) + +library(rstan) +rstan_options(auto_write = TRUE) -if(use_cmdstanr) { - library(cmdstanr) -} else { - library(rstan) -} options(mc.cores = parallel::detectCores()) @@ -75,7 +73,7 @@ options(SBC.min_chunk_size = 5) # Setup caching of results -cache_dir <- "./approximate_computation_SBC_cache" +cache_dir <- "./_approximate_computation_SBC_cache" if(!dir.exists(cache_dir)) { dir.create(cache_dir) } @@ -100,7 +98,7 @@ poisson_generator_single <- function(N){ lambda <- rgamma(n = 1, shape = 15, rate = 5) y <- rpois(n = N, lambda = lambda) list( - parameters = list( + variables = list( lambda = lambda ), generated = list( @@ -132,9 +130,9 @@ Since the model runs quickly and is simple, we start with 1000 simulations. set.seed(46522641) ds_poisson <- generate_datasets( SBC_generator_function(poisson_generator_single, N = 20), - n_datasets = 1000) + n_sims = 1000) res_poisson <- - compute_results( + compute_SBC( ds_poisson, backend_poisson, keep_fits = FALSE, cache_mode = "results", cache_location = file.path(cache_dir, "poisson")) ``` @@ -152,6 +150,8 @@ To put this in different terms we can look at the observed coverage of central 5 empirical_coverage(res_poisson$stats,width = c(0.95, 0.8, 0.5)) ``` + + ## Is more data better? One would expect that the normal approximation implemented in ADVI becomes better with increased size of the data, this is however not necessarily true - let's run the same model, but increase `N` - the number of observed data points: @@ -160,10 +160,10 @@ One would expect that the normal approximation implemented in ADVI becomes bette set.seed(23546224) ds_poisson_100 <- generate_datasets( SBC_generator_function(poisson_generator_single, N = 100), - n_datasets = 1000) + n_sims = 1000) res_poisson_100 <- - compute_results(ds_poisson_100, backend_poisson, keep_fits = FALSE, - cache_mode = "none", cache_location = file.path(cache_dir, "poisson_100")) + compute_SBC(ds_poisson_100, backend_poisson, keep_fits = FALSE, + cache_mode = "results", cache_location = file.path(cache_dir, "poisson_100")) ``` In this case the model becomes clearly overconfident: @@ -179,9 +179,55 @@ The empirical coverage of the central intervals confirms this: empirical_coverage(res_poisson_100$stats,width = c(0.95, 0.8, 0.5)) ``` +## Optimizing + +If the model is so simple, maybe a simple Laplace approximation around +the posterior mode would suffice? We can use Stan's `optimizing` mode exactly +for that. Although unfortunately, this is currently implemented only in `rstan` +and not for `cmdstanr` (because the underlying CmdStan does not expose the +Hessian of the optimizing fit). + +So let us build an optimizing backend + +```{r} +model_poisson_rstan <- stan_model("stan/poisson.stan") +backend_poisson_optimizing <- SBC_backend_rstan_optimizing(model_poisson_rstan) +``` + +and use it to fit the same datasets - first to the one with `N = 20`. + +```{r} +res_poisson_optimizing <- + compute_SBC(ds_poisson, backend_poisson_optimizing, keep_fits = FALSE, + cache_mode = "results", cache_location = file.path(cache_dir, "poisson_opt")) +``` + +The resulting ECDF and rank plots are very good. + +```{r ecdf_rank_poisson_optimizing} +plot_ecdf_diff(res_poisson_optimizing) +plot_rank_hist(res_poisson_optimizing) +``` + +Similarly, we can fit the `N = 100` datasets. + +```{r} +res_poisson_optimizing_100 <- + compute_SBC(ds_poisson_100, backend_poisson_optimizing, keep_fits = FALSE, + cache_mode = "results", cache_location = file.path(cache_dir, "poisson_opt_100")) +``` + +The resulting rank plot once again indicates no serious issues and we thus get +better results here than with ADVI. + +```{r ecdf_rank_poisson_optimizing_100} +plot_ecdf_diff(res_poisson_optimizing_100) +plot_rank_hist(res_poisson_optimizing_100) +``` + ## Summary -We see that for simple models ADVI can provide very tight approximation to exact inference, but this cannot be taken for granted. Surprisingly, having more data does not make the ADVI approximation necessarily better. +We see that for simple models ADVI can provide very tight approximation to exact inference, but this cannot be taken for granted. Surprisingly, having more data does not make the ADVI approximation necessarily better. Additionally, for such simple models, a simple Laplace approximation around the posterior mode works better (and likely faster) than ADVI. # Example II - Hidden Markov Model @@ -240,7 +286,7 @@ generator_HMM <- function(N) { y <- rpois(N, mu[states]) list( - parameters = list( + variables = list( mu_background = mu_background, mu_signal = mu_signal, # rdirichlet returns matrices, convert to 1D vectors @@ -269,9 +315,13 @@ cat(readLines("stan/hmm_poisson.stan"), sep = "\n") We start with the default (meanfield) variational backend via Stan: ```{r} +if(package_version(cmdstanr::cmdstan_version()) < package_version("2.26.0") ) { + stop("The models int this section require CmdStan 2.26 or later.") +} model_HMM <- cmdstan_model("stan/hmm_poisson.stan") backend_HMM <- SBC_backend_cmdstan_variational(model_HMM, n_retries_init = 3) ``` + Since we are feeling confident that our model is implemented correctly (and the model runs quickly), we start with 100 simulations and assume 100 observations for each. If you are developing a new model, it might be useful to start with fewer simulations, as discussed in the [small model workflow vignette](https://hyunjimoon.github.io/SBC/articles/small_model_workflow.html). @@ -280,12 +330,12 @@ And we compute results ```{r} set.seed(642354822) -ds_hmm <- generate_datasets(SBC_generator_function(generator_HMM, N = 100), n_datasets = 100) -res_hmm <- compute_results(ds_hmm, backend_HMM, +ds_hmm <- generate_datasets(SBC_generator_function(generator_HMM, N = 100), n_sims = 100) +res_hmm <- compute_SBC(ds_hmm, backend_HMM, cache_mode = "results", cache_location = file.path(cache_dir, "hmm")) ``` -There are not huge problems, but the `mu_signal` parameter seems to not be well calibrated: +There are not huge problems, but the `mu_signal` variable seems to not be well calibrated: ```{r hmm_ecdf_ranks} plot_ecdf_diff(res_hmm) @@ -302,19 +352,19 @@ To make sure this is not a fluke we add 400 more simulations. ```{r} set.seed(2254355) -ds_hmm_2 <- generate_datasets(SBC_generator_function(generator_HMM, N = 100), n_datasets = 400) +ds_hmm_2 <- generate_datasets(SBC_generator_function(generator_HMM, N = 100), n_sims = 400) ``` ```{r} res_hmm_2 <- bind_results( res_hmm, - compute_results(ds_hmm_2,backend_HMM, + compute_SBC(ds_hmm_2,backend_HMM, cache_mode = "results", cache_location = file.path(cache_dir, "hmm2")) ) ``` -This confirms the problems with `mu_signal`. additionally, we see that `mu_background` and the `rho` parameters also show some irregularities. +This confirms the problems with `mu_signal`. additionally, we see that `mu_background` and the `rho` variables also show some irregularities. ```{r hmm_2_ecdf_ranks} plot_ecdf_diff(res_hmm_2) @@ -333,7 +383,7 @@ plot_coverage(res_hmm_2) This is what we get when we focus on the 90% posterior credible interval: ```{r} -coverage_hmm <- empirical_coverage(res_hmm_2$stats, width = 0.9)[, c("parameter", "ci_low", "ci_high")] +coverage_hmm <- empirical_coverage(res_hmm_2$stats, width = 0.9)[, c("variable", "ci_low", "ci_high")] coverage_hmm ``` @@ -355,7 +405,7 @@ We may try if the situation improves with full-rank ADVI - let's run it for the ```{r} ds_hmm_all <- bind_datasets(ds_hmm, ds_hmm_2) -res_hmm_fullrank <- compute_results( +res_hmm_fullrank <- compute_SBC( ds_hmm_all, SBC_backend_cmdstan_variational(model_HMM, algorithm = "fullrank", n_retries_init = 3), cache_mode = "results", cache_location = file.path(cache_dir, "hmm_fullrank")) @@ -371,7 +421,7 @@ plot_rank_hist(res_hmm_fullrank) Interestingly, the rank plot for `mu_signal` shows a "frowning" shape, meaning the mean-field approximation is slightly underconfident here. -This is nicely demonstrated by looking at the central interval coverage - now the coverage of `mu_signal` is _larger_ than it should be, so the model is underconfident (i.e. more conservative), while the coverages for other parameters track the nominal values quite closely. +This is nicely demonstrated by looking at the central interval coverage - now the coverage of `mu_signal` is _larger_ than it should be, so the model is underconfident (i.e. more conservative), while the coverages for other variables track the nominal values quite closely. ```{r hmm_fullrank_coverage} plot_coverage(res_hmm_fullrank) @@ -382,7 +432,7 @@ Or alternatively looking at the numerical values for coverage of the central 90% ```{r} coverage_hmm_fullrank <- - empirical_coverage(res_hmm_fullrank$stats, width = 0.9)[, c("parameter", "ci_low", "ci_high")] + empirical_coverage(res_hmm_fullrank$stats, width = 0.9)[, c("variable", "ci_low", "ci_high")] coverage_hmm_fullrank ``` @@ -398,7 +448,7 @@ optimization convergence. Here we'll use the default mean-field algorithm, but decrease the `tol_rel_obj` (the default value is 0.01). So let's try that. ```{r} -res_hmm_lowtol <- compute_results( +res_hmm_lowtol <- compute_SBC( ds_hmm_all, SBC_backend_cmdstan_variational(model_HMM, tol_rel_obj = 0.001, n_retries_init = 3), cache_mode = "results", cache_location = file.path(cache_dir, "hmm_lowtol")) @@ -433,19 +483,44 @@ plot_coverage(res_hmm_lowtol_conv) and the numerical values for the coverage of the central 90% interval. ```{r} -empirical_coverage(res_hmm_lowtol$stats, width = 0.9)[, c("parameter", "ci_low", "ci_high")] +empirical_coverage(res_hmm_lowtol$stats, width = 0.9)[, c("variable", "ci_low", "ci_high")] ``` This variant has somewhat lower overall mismatch, but tends to be overconfident, which might in some cases be less desirable than the more conservative fullrank. +## Optimizing + +Would optimizing provide sensible results in this case? We build an optimizng backend and run it. + +```{r} +SBC:::require_package_version("rstan", "2.26", "The models in the following sections need more recent rstan than what is available on CRAN - use https://mc-stan.org/r-packages/ to get it") + +model_HMM_rstan <- stan_model("stan/hmm_poisson.stan") + +res_hmm_optimizing <- compute_SBC( + ds_hmm_all, + SBC_backend_rstan_optimizing(model_HMM_rstan, n_retries_hessian = 3), + cache_mode = "results", cache_location = file.path(cache_dir, "hmm_optimizing")) +``` + + +We see that while for some variables (`mu_signal`, the transition probabilities `t[]`), the Laplace approximation is reasonably well calibrated, it is very +badly calibrated with respect to the initial states `rho` and also for `mu_background`, where there is substantial bias. So if we were only interested in +a subset of the variables, the optimizing fit could still be on OK choice. + +```{r hmm_optimizing_ecdf_ranks} +plot_ecdf_diff(res_hmm_optimizing) +plot_rank_hist(res_hmm_optimizing) +``` + ## Summary -To summarise, the HMM model turns out to pose minor problems for ADVI that can be partially resolved by tweaking the parameters of the ADVI algorithm. +To summarise, the HMM model turns out to pose minor problems for ADVI that can be partially resolved by tweaking the parameters of the ADVI algorithm. Just using optimizing results in much worse calibration than ADVI. Another relevant question is how much speed we gained. To have a comparison, we run full MCMC with Stan for the same datasets. ```{r} -res_hmm_sample <- compute_results( +res_hmm_sample <- compute_SBC( ds_hmm[1:50], SBC_backend_cmdstan_sample(model_HMM), keep_fits = FALSE, @@ -460,18 +535,31 @@ plot_rank_hist(res_hmm_sample) ``` For the machine we built the vignette on, here are the distributions -of times (for ADVI) and time of longest chain (for HMC): +of times (for ADVI and optimizing) and time of longest chain (for HMC): ```{r hmm_time} hmm_time <- - rbind(data.frame(alg = "Meanfield", - time = res_hmm$backend_diagnostics$time), + rbind( + data.frame(alg = "Optimizing", + time = res_hmm_optimizing$backend_diagnostics$time), + data.frame(alg = "Meanfield", + time = res_hmm$backend_diagnostics$time), data.frame(alg = "Fullrank", time = res_hmm_fullrank$backend_diagnostics$time), data.frame(alg = "Meanfield + low tol.", time = res_hmm_lowtol$backend_diagnostics$time), data.frame(alg = "Sampling (longest chain)", time = res_hmm_sample$backend_diagnostics$max_chain_time)) + +max_time_optimizing <- round(max(res_hmm_optimizing$backend_diagnostics$time), 2) + + +hmm_time$alg <- factor(hmm_time$alg, + levels = c("Optimizing", + "Meanfield", + "Fullrank", + "Meanfield + low tol.", + "Sampling (longest chain)")) ggplot(hmm_time, aes(x = time)) + geom_histogram(aes(y = ..density..), bins = 20) + @@ -481,7 +569,7 @@ ggplot(hmm_time, aes(x = time)) + Depressingly, while using lower tolerance let us get almost as good uncertainty quantification as sampling, it also erased a big part of the performance advantage variational -inference had over sampling for this model. However, both the fullrank and meanfield approximations provide not-terrible estimates and are noticeably faster than sampling. +inference had over sampling for this model. However, both the fullrank and meanfield approximations provide not-terrible estimates and are noticeably faster than sampling. Optimizing is by far the fastest as the longest time observed is just `r max_time_optimizing` seconds. @@ -531,7 +619,7 @@ generator_HMM_ordered <- function(N) { y <- rpois(N, mu[states]) list( - parameters = list( + variables = list( log_mu = log_mu, # rdirichlet returns matrices, convert to 1D vectors t1 = as.numeric(t1), @@ -547,7 +635,7 @@ generator_HMM_ordered <- function(N) { ``` -So let us build a default variational backend and fit it to just 20 datasets. +So let us build a default variational backend and fit it to just 20 simulations. ```{r} model_HMM_ordered <- cmdstan_model("stan/hmm_poisson_ordered.stan") @@ -559,15 +647,15 @@ backend_HMM_ordered <- SBC_backend_cmdstan_variational(model_HMM_ordered, n_retr set.seed(12333654) ds_hmm_ordered <- generate_datasets( SBC_generator_function(generator_HMM_ordered, N = 100), - n_datasets = 20) + n_sims = 20) res_hmm_ordered <- - compute_results(ds_hmm_ordered, backend_HMM_ordered, + compute_SBC(ds_hmm_ordered, backend_HMM_ordered, cache_mode = "results", cache_location = file.path(cache_dir, "hmm_ordered")) ``` -Immediately we see that the `log_mu[1]` parameter is heavily miscalibrated. +Immediately we see that the `log_mu[1]` variable is heavily miscalibrated. ```{r hmm_ordered_ecdf_ranks} plot_ecdf_diff(res_hmm_ordered) @@ -576,8 +664,8 @@ plot_rank_hist(res_hmm_ordered) What changed? To understand that we need to remember how Stan [represents constrained data types]( https://mc-stan.org/docs/2_28/reference-manual/variable-transforms.html). In short, -in the model in Example II, Stan will internally work with `mu_background__ = log(mu_background)` and -`mu_signal__ = log(mu_signal)`. In this modified model, the internal representation will be: `log_mu_1__ = log_mu[1]` (without any change) and `log_mu_2__ = log(log_mu[2] - log_mu[1])`. So the mean for the active component is actually `exp(log_mu_1__ + exp(log_mu_2__))`. This then introduces a complex correlation structure between the internal variables that the ADVI algorithm is unable to handle well. +in the model in Example II, Stan will internally work with the so called _unconstrained_ parameters `mu_background__ = log(mu_background)` and +`mu_signal__ = log(mu_signal)`. In this modified model, the internal representation will be: `log_mu_1__ = log_mu[1]` (without any change) and `log_mu_2__ = log(log_mu[2] - log_mu[1])`. So the mean for the active component is actually `exp(log_mu_1__ + exp(log_mu_2__))`. This then introduces a complex correlation structure between the unconstrained parameters that the ADVI algorithm is unable to handle well. Even trying the fullrank variant does not help: @@ -588,7 +676,7 @@ backend_HMM_ordered_fullrank <- algorithm = "fullrank", n_retries_init = 3) res_hmm_ordered_fullrank <- - compute_results(ds_hmm_ordered, backend_HMM_ordered, + compute_SBC(ds_hmm_ordered, backend_HMM_ordered, cache_mode = "results", cache_location = file.path(cache_dir, "hmm_ordered_fullrank")) ``` @@ -600,10 +688,28 @@ plot_ecdf_diff(res_hmm_ordered_fullrank) plot_rank_hist(res_hmm_ordered_fullrank) ``` +To have a complete overview we may also try the optimizing fit: + +```{r} +model_HMM_ordered_rstan <- stan_model("stan/hmm_poisson_ordered.stan") + +res_hmm_ordered_optimizing <- compute_SBC( + ds_hmm_ordered, + SBC_backend_rstan_optimizing(model_HMM_ordered_rstan), + cache_mode = "results", cache_location = file.path(cache_dir, "hmm_ordered_optimizing")) +``` + +in this case, optimizing has better calibration for `log_mu`, but worse calibration for `rho` than ADVI. + +```{r hmm_ordered_optimizing_ecdf_ranks} +plot_ecdf_diff(res_hmm_ordered_optimizing) +plot_rank_hist(res_hmm_ordered_optimizing) +``` + # Conclusion As this vignette has shown, for some models, ADVI will provide results that are -close to what we get with sampling, but it may also fail catastrophically on models that are just slightly different. Tweaking the algorithm parameters might also be necessary. ADVI cannot thus be used blindly. Fortunately SBC can be used to check against this type of problem without ever needing to run the full sampling. +close to what we get with sampling, but it may also fail catastrophically on models that are just slightly different. Tweaking the algorithm parameters might also be necessary. For some cases where ADVI works, the Laplace approximation with optimizing will also work well. ADVI (and optimizng) cannot thus be used blindly. Fortunately SBC can be used to check against this type of problem without ever needing to run the full sampling. # Next step: Evolving computation and diagnostic. diff --git a/vignettes/discrete_params.Rmd b/vignettes/discrete_params.Rmd deleted file mode 100644 index b696d97..0000000 --- a/vignettes/discrete_params.Rmd +++ /dev/null @@ -1,212 +0,0 @@ ---- -title: "SBC with discrete parameters" -author: "Martin Modrák" -date: "`r Sys.Date()`" -output: - rmarkdown::html_vignette: - toc: yes -vignette: > - %\VignetteIndexEntry{SBC with discrete parameters} - %\VignetteEngine{knitr::rmarkdown} - \usepackage[utf8]{inputenc} ---- - -SBC was primarily designed for continuous parameters, but can be used -with models that have discrete parameters - whether the parameters -are directly represented (e.g. in JAGS) or marginalized out (as is usual in Stan). - -```{r setup, message=FALSE,warning=FALSE, results="hide"} -library(SBC); -library(ggplot2) - -use_cmdstanr <- TRUE # Set to false to use rstan instead - -if(use_cmdstanr) { - library(cmdstanr) -} else { - library(rstan) -} - -# Multiprocessing support -library(future) -plan(multisession) - -# The fits are very fast and we fit just a few, -# so we force a minimum chunk size to reduce overhead of -# paralellization and decrease computation time. -options(SBC.min_chunk_size = 5) - -# Setup caching of results -cache_dir <- "./discrete_params_SBC_cache" -if(!dir.exists(cache_dir)) { - dir.create(cache_dir) -} - -``` - -We take the changepoint model from: -https://mc-stan.org/docs/2_26/stan-users-guide/change-point-section.html - -```{r, comment = ""} -cat(readLines("stan/discrete_params1.stan"), sep = "\n") -``` - -```{r} -if(use_cmdstanr) { - model_1 <- cmdstan_model("stan/discrete_params1.stan") - backend_1 <- SBC_backend_cmdstan_sample(model_1) -} else { - model_1 <- stan_model("stan/discrete_params1.stan") - backend_1 <- SBC_backend_rstan_sample(model_1) -} -``` - -Now, let's generate data from the model. - -```{r} -generate_single_dataset_1 <- function(T, r_e, r_l) { - e <- rexp(1, r_e) - l <- rexp(1, r_l) - s <- sample.int(T, size = 1) - - y <- array(NA_real_, T) - for(t in 1:T) { - if(t <= s) { - rate <- e - } else { - rate <- l - } - y[t] <- rpois(1, rate) - } - - list( - parameters = list( - e = e, l = l, s = s - ), generated = list( - T = T, - r_e = r_e, - r_l = r_l, - y = y - ) - ) -} - -generator_1 <- SBC_generator_function(generate_single_dataset_1, T = 5, r_e = 0.5, r_l = 0.1) -``` - - -```{r} -set.seed(85394672) -datasets_1 <- generate_datasets(generator_1, 30) - -``` - -```{r} -results_1 <- compute_results(datasets_1, backend_1, - cache_mode = "results", - cache_location = file.path(cache_dir, "model1")) -``` - -Here we also use the caching feature to avoid recomputing the fits when recompiling this vignette. -In practice, caching is not necessary but is often useful. - -TODO the diagnostic failures are false positives, because Rhat and ESS don't work very well for discrete parameters. -We need to figure out how to handle this better. - -We can quickly note that the statistics for the `s` parameter are extreme - many ranks of 0 and _extreme_ z-scores, including -Infinity. Seing just one or two such fits should be enough to convince us that there is something fundamentally wrong. - -```{r} -dplyr::filter(results_1$stats, parameter == "s") -``` - - -Inspecting the statistics shows that quite often, the model is quite sure of the value of `s` while the simulated value is just one less. - -Looking at the `ecdf_diff` plot we see that this seems to compromise heavily the inference for `s`, but the other parameters do not show such bad behaviour. - -```{r results1_plots} -plot_ecdf_diff(results_1) -plot_rank_hist(results_1) -``` - -An important note: you may wonder, how we got such a wiggly line for the `s` parameter - doesn't it -have just 5 possible values? Shouldn't therefore the ECDF be one big staircase? -In fact the package does a little trick to make discrete parameters comparable to -continuous - the rank of a discrete parameter is chosen uniformly randomly across -all possible ranks (i.e. posterior draws that have exactly equal value). This -means that if the model is well behaved, ranks for the discrete parameter will -be uniformly distributed across the whole range of possible ranks and we can -use exactly the same diagnostics for a discrete parameter as we do for the -continuous ones. - -But back to the model - what happened? What is wrong with it? After some inspection, you may notice that the simulator does not match the model - the model takes the early rate (`e`) for points `t < s` while the simulator takes `e` for points `t <= s`, so there is effectively a shift by one time point between the simulator and the model. So let's assume that we beleive that the Stan model is in fact right. We therefore updated the simulator to match the model: - - -```{r} -generate_single_dataset_2 <- function(T, r_e, r_l) { - e <- rexp(1, r_e) - l <- rexp(1, r_l) - s <- sample.int(T, size = 1) - - y <- array(NA_real_, T) - for(t in 1:T) { - if(t < s) { ### <--- Only change here - rate <- e - } else { - rate <- l - } - y[t] <- rpois(1, rate) - } - - list( - parameters = list( - e = e, l = l, s = s - ), generated = list( - T = T, - r_e = r_e, - r_l = r_l, - y = y - ) - ) -} - -generator_2 <- SBC_generator_function(generate_single_dataset_2, T = 5, r_e = 0.5, r_l = 0.1) -``` - -And we can recompute: - - -```{r} -set.seed(5846502) -datasets_2 <- generate_datasets(generator_2, 30) -results_2 <- compute_results(datasets_2, backend_1, - cache_mode = "results", - cache_location = file.path(cache_dir, "model2")) -``` - -```{r results_2_plots} -plot_rank_hist(results_2) -plot_ecdf_diff(results_2) -``` - - -Looks good, so let us add some more simulations to make sure the model behaves well. - -```{r} -set.seed(54321488) -datasets_3 <- generate_datasets(generator_2, 100) -results_3 <- compute_results(datasets_3, backend_1, - cache_mode = "results", - cache_location = file.path(cache_dir, "model3")) - -results_all <- bind_results(results_2, results_3) -``` - -```{r results_all_plots} -plot_rank_hist(results_all) -plot_ecdf_diff(results_all) -``` - - -Now - as far as this amount of SBC steps can see, the model is good and we get good behaviour for both the continuous and the discrete parameters. - diff --git a/vignettes/discrete_vars.Rmd b/vignettes/discrete_vars.Rmd new file mode 100644 index 0000000..14c114f --- /dev/null +++ b/vignettes/discrete_vars.Rmd @@ -0,0 +1,322 @@ +--- +title: "SBC with discrete parameters in Stan and JAGS" +author: "Martin Modrák" +date: "`r Sys.Date()`" +output: + rmarkdown::html_vignette: + toc: yes +vignette: > + %\VignetteIndexEntry{SBC with discrete parameters in Stan and JAGS} + %\VignetteEngine{knitr::rmarkdown} + \usepackage[utf8]{inputenc} +--- + +SBC was primarily designed for continuous parameters, but can be used +with models that have discrete parameters - whether the parameters +are directly represented (e.g. in BUGS/JAGS) or marginalized out (as is usual in Stan). + +## Stan version and debugging + +```{r setup, message=FALSE,warning=FALSE, results="hide"} +library(SBC); +library(ggplot2) + +use_cmdstanr <- getOption("SBC.vignettes_cmdstanr", TRUE) # Set to false to use rstan instead + +if(use_cmdstanr) { + library(cmdstanr) +} else { + library(rstan) + rstan_options(auto_write = TRUE) +} + +# Multiprocessing support +library(future) +plan(multisession) + +# The fits are very fast and we fit just a few, +# so we force a minimum chunk size to reduce overhead of +# paralellization and decrease computation time. +options(SBC.min_chunk_size = 5) + +# Setup caching of results +if(use_cmdstanr) { + cache_dir <- "./_discrete_vars_SBC_cache" +} else { + cache_dir <- "./_discrete_vars_rstan_SBC_cache" +} +if(!dir.exists(cache_dir)) { + dir.create(cache_dir) +} + +``` + +We take the changepoint model from: +https://mc-stan.org/docs/2_26/stan-users-guide/change-point-section.html + +```{r, comment = ""} +cat(readLines("stan/discrete_vars1.stan"), sep = "\n") +``` + +```{r} +if(use_cmdstanr) { + model_1 <- cmdstan_model("stan/discrete_vars1.stan") + backend_1 <- SBC_backend_cmdstan_sample(model_1) +} else { + model_1 <- stan_model("stan/discrete_vars1.stan") + backend_1 <- SBC_backend_rstan_sample(model_1) +} +``` + +Now, let's generate data from the model. + +```{r} +generate_single_sim_1 <- function(T, r_e, r_l) { + e <- rexp(1, r_e) + l <- rexp(1, r_l) + s <- sample.int(T, size = 1) + + y <- array(NA_real_, T) + for(t in 1:T) { + if(t <= s) { + rate <- e + } else { + rate <- l + } + y[t] <- rpois(1, rate) + } + + list( + variables = list( + e = e, l = l, s = s + ), generated = list( + T = T, + r_e = r_e, + r_l = r_l, + y = y + ) + ) +} + +generator_1 <- SBC_generator_function(generate_single_sim_1, T = 5, r_e = 0.5, r_l = 0.1) +``` + + +```{r} +set.seed(85394672) +datasets_1 <- generate_datasets(generator_1, 30) + +``` + +Additionally, +we'll add a generated quantity expressing the total log-likelihood of data given the +fitted parameters. The expression within the `generated_quantities()` call is evaluated +for both prior and posterior draws and +included as another variable in SBC checks. It turns out this type of generated quantities +can increase the sensitivity of the SBC against some issues in the model. +See `vignette("limits_of_SBC")` for a more detailed discussion of this. + +```{r} +log_lik_gq <- generated_quantities(log_lik = sum(dpois(y, ifelse(1:T < s, e, l), log = TRUE))) +``` + +So finally, lets actually compute SBC: + +```{r} +results_1 <- compute_SBC(datasets_1, backend_1, + cache_mode = "results", + cache_location = file.path(cache_dir, "model1"), + gen_quants = log_lik_gq) +``` + +Here we also use the caching feature to avoid recomputing the fits when recompiling this vignette. +In practice, caching is not necessary but is often useful. + +TODO the diagnostic failures are false positives, because Rhat and ESS don't work very well for discrete parameters. +We need to figure out how to handle this better. + +We can quickly note that the statistics for the `s` parameter are extreme - many ranks of 0 and _extreme_ z-scores, including -Infinity. Seing just one or two such fits should be enough to convince us that there is something fundamentally wrong. + +```{r} +dplyr::filter(results_1$stats, variable == "s") +``` + + +Inspecting the statistics shows that quite often, the model is quite sure of the value of `s` while the simulated value is just one less. + +Looking at the `ecdf_diff` plot we see that this seems to compromise heavily the inference for `s`, but the other parameters do not show such bad behaviour. Note that the `log_lik` generated quantity shows even starker +failure than `s`, so it indeed poses a stricter check in this scenario. + +```{r results1_plots} +plot_ecdf_diff(results_1) +plot_rank_hist(results_1) +``` + +An important note: you may wonder, how we got such a wiggly line for the `s` parameter - doesn't it +have just 5 possible values? Shouldn't therefore the ECDF be one big staircase? +In fact the package does a little trick to make discrete parameters comparable to +continuous - the rank of a discrete parameter is chosen uniformly randomly across +all possible ranks (i.e. posterior draws that have exactly equal value). This +means that if the model is well behaved, ranks for the discrete parameter will +be uniformly distributed across the whole range of possible ranks and we can +use exactly the same diagnostics for a discrete parameter as we do for the +continuous ones. + +But back to the model - what happened? What is wrong with it? After some inspection, you may notice that the simulator does not match the model - the model takes the early rate (`e`) for points `t < s` while the simulator takes `e` for points `t <= s`, so there is effectively a shift by one time point between the simulator and the model. So let's assume that we beleive that the Stan model is in fact right. We therefore updated the simulator to match the model: + + +```{r} +generate_single_sim_2 <- function(T, r_e, r_l) { + e <- rexp(1, r_e) + l <- rexp(1, r_l) + s <- sample.int(T, size = 1) + + y <- array(NA_real_, T) + for(t in 1:T) { + if(t < s) { ### <--- Only change here + rate <- e + } else { + rate <- l + } + y[t] <- rpois(1, rate) + } + + list( + variables = list( + e = e, l = l, s = s + ), generated = list( + T = T, + r_e = r_e, + r_l = r_l, + y = y + ) + ) +} + +generator_2 <- SBC_generator_function(generate_single_sim_2, T = 5, r_e = 0.5, r_l = 0.1) +``` + +And we can recompute: + + +```{r} +set.seed(5846502) +datasets_2 <- generate_datasets(generator_2, 30) +results_2 <- compute_SBC(datasets_2, backend_1, + gen_quants = log_lik_gq, + cache_mode = "results", + cache_location = file.path(cache_dir, "model2")) +``` + +```{r results_2_plots} +plot_rank_hist(results_2) +plot_ecdf_diff(results_2) +``` + + +Looks good, so let us add some more simulations to make sure the model behaves well. + +```{r} +set.seed(54321488) +datasets_2_more <- generate_datasets(generator_2, 100) +results_2_more <- compute_SBC(datasets_2_more, backend_1, + gen_quants = log_lik_gq, + cache_mode = "results", + cache_location = file.path(cache_dir, "model3")) + +results_2_all <- bind_results(results_2, results_2_more) +``` + +```{r results_2_all_plots} +plot_rank_hist(results_2_all) +plot_ecdf_diff(results_2_all) +``` + + +Now - as far as this amount of SBC steps can see, the model is good and we get good behaviour for both the continuous and the discrete parameters and the `log_lik` generated quantity. Hooray! + +## JAGS version + +We can now write the same model in JAGS. This becomes a bit easier as JAGS lets +us represent discrete parameters directly: + +```{r, comment = ""} +cat(readLines("other_models/changepoint.jags"), sep = "\n") +``` + +We will use the `rjags` package - and relatively large number of samples as +we can expect some autocorrelation in the Gibbs sampler. + +```{r} +backend_jags <- SBC_backend_rjags("other_models/changepoint.jags", + variable.names = c("e","l","s"), + n.iter = 10000, + n.burnin = 1000, + n.chains = 4, + thin = 10) +``` + + +Running SBC with all the corrected datasets from before (rJAGS accepts input +data in exactly the same format as Stan, so we can reuse the datasets without any change): + +```{r} +datasets_2_all <- bind_datasets(datasets_2, datasets_2_more) +results_jags <- compute_SBC(datasets_2_all, backend_jags, + gen_quants = log_lik_gq, + cache_mode = "results", + cache_location = file.path(cache_dir, "rjags")) +``` + +Similarly to the case above, the Rhat and ESS warnings are false positives due to the `s` parameter, which +we need to handle better. + +The rank plots show no problems. + +```{r ranks_jags} +plot_rank_hist(results_jags) +plot_ecdf_diff(results_jags) +``` + +As an exercise, we can also write the marginalized version of the model in JAGS. +In some cases, marginalization improves performance even for JAGS models, however, +for this model it is actually not an improvement, presumably because the model +is very simple. + + +```{r, comment = ""} +cat(readLines("other_models/changepoint_marginalized.jags"), sep = "\n") +``` + +The code got quite a bit more complex, se let's check if we didn't mess up +the rewrite - first we +build a backend with this new representation: + +```{r} +backend_jags_marginalized <- SBC_backend_rjags("other_models/changepoint_marginalized.jags", + variable.names = c("e","l","s"), + n.iter = 10000, + n.burnin = 1000, + n.chains = 4, + thin = 10) +``` + +Then we run the actual SBC: + +```{r} +results_jags_marginalized <- compute_SBC(datasets_2_all, backend_jags_marginalized, + gen_quants = log_lik_gq, + cache_mode = "results", + cache_location = file.path(cache_dir, "rjags_marginalized")) +``` + +And the ranks plots look good, so we indeed probably did succeed in correctly marginalizing +the `s` parameter! + +```{r ranks_jags_marginalized} +plot_rank_hist(results_jags_marginalized) +plot_ecdf_diff(results_jags_marginalized) +``` + + + diff --git a/vignettes/eightschools_self-calibration.Rmd b/vignettes/eightschools_self-calibration.Rmd new file mode 100644 index 0000000..1a0fc11 --- /dev/null +++ b/vignettes/eightschools_self-calibration.Rmd @@ -0,0 +1,237 @@ +--- +title: "Running self-calibration for eightschools model." +output: html_notebook +--- + +Setup and dependency imports: + +```{r} +library(cmdstanr) +options(mc.cores = parallel::detectCores()) +library(cmdstanr) +library(ggplot2) +library(posterior) +library(mclust) +library(dplyr) +``` + +Define the number of simulations + +```{r} +nsims = 30 # number of SBC datasets per calibration iteration +ndraws = 1000 # number of posterior draws +nchains = 4 + +``` + + +## Attempt 1 - WRONG GENERATOR +**DO NOT USE THIS AS A REFERENCE. INSTEAD TO GO ATTEMPT #2** +Work with centered parameterization first: + +```{r} + +cat( readLines("models/eightschools_cp_calib.stan") , sep = "\n" ) +``` + +Write out the generator for CP eightschools: + +```{r} + +generator_eightschools_cp_gmm <- function(mixture_mean_draws_rvars, mixture_bw_draws_rvars, fixed_values){ + # fixed value across simulated datasets + nsims <- fixed_values$nsims + J <- fixed_values$J + sigma <- fixed_values$sigma + + # Draw tau from the proposed normal mixture + tau <- abs(rvar_rng(rnorm, 1, sample(mixture_mean_draws_rvars$tau, 1), sd = mixture_bw_draws_rvars$tau)) + + # other parameters are drawn from the default prior + mu = rvar_rng(rnorm, 1, mean = 0, sd = 5, ndraws = ndraws(tau)) + theta <- rvar_rng(rnorm, 1, mean = mu, sd = tau) + # draw y from simulated parameters + y <- rvar_rng(rnorm, J, mean = theta, sd = sigma) + + gen_data_draws_rvars <- draws_rvars(J = J, y = y, sigma = sigma, nsims = nsims, mm_mean = mixture_mean_draws_rvars$tau, mm_bandwidth = mixture_bw_draws_rvars$tau) + + SBC_datasets( + parameters = as_draws_matrix(list(tau = tau)), + generated = draws_rvars_to_standata(gen_data_draws_rvars) + ) +} +``` + + +Initialize model data and initial hyperprior values: + +```{r} + +# fixed values taken from: https://github.com/stan-dev/posteriordb/blob/master/posterior_database/data/data-raw/eight_schools/eight_schools.R + +fixed_values <- list(J = 8, nsims = nsims, sigma = rvar(array(rep(as.integer(c(15, 10, 16, 11, 9, 11, 10, 18)), each = nsims), dim = c(nsims, 8)))) + +initial_mixture_mean <- draws_rvars(tau = abs(rvar_rng(rcauchy, nsims, location = 0, scale = 5, ndraws = nsims))) +initial_mixture_bw <- draws_rvars(tau = abs(rvar_rng(rnorm, 1, mean = 1, sd = 0.5, ndraws = nsims))) + +``` + +Compile model and define the SBC backend: + +```{r} +eightschools_cp_model <- cmdstan_model("models/eightschools_cp_calib.stan") +backend_hmc <- SBC_backend_cmdstan_sample(eightschools_cp_model, chains = nchains, iter_sampling = ndraws / nchains) +``` + +Start calibration with an artificial limit to max calibration iterations(10): + +```{r} +calibration_result <- self_calib(generator = generator_eightschools_cp_gmm, backend = backend_hmc, mixture_means_init_draws_rvars = initial_mixture_mean, + mixture_bw_init_draws_rvars = initial_mixture_bw, nsims_fn = function(...){nsims},thin = 4, max_selfcalib_iters = 10, + save_all_results = TRUE, fixed_generator_args = list(fixed_values = fixed_values)) +``` + + +Plot pooled fitted results of tau for each calibration iteration + +```{r} +total_calib_iters <- length(names(calibration_result)) +combined_df <- matrix(nrow = length(names(calibration_result)) * nsims, ncol = 2) +colnames(combined_df) <- c("tau_mean", "iteration") +for(i in 1:length(names(calibration_result))){ + for(j in 1:nsims){ + combined_df[(i-1) * nsims + j, ] <- c(mean(as.vector(subset_draws(calibration_result[[paste0("result_", i)]]$fits[[j]]$draws(), variable = "tau"))), i) + } + +} +combined_df <- data.frame(combined_df) +ggplot(combined_df, aes(x = iteration, y = tau_mean)) + geom_point(mapping = aes(x = iteration, y = tau_mean)) + + stat_summary(fun.data = "mean_sdl", fun.args = list(mult = 2), geom = "pointrange", color = "red")# + scale_y_continuous(limits = c(0, 2000)) + +``` +Manually Take a look at summary statistics: + +```{r} +summ <- combined_df %>% + group_by(iteration) %>% + summarize(mean = mean(tau_mean), median = median(tau_mean), sd = sd(tau_mean)) +summ +``` +Manually inspect spurious statistics + +```{r} +calibration_result$result_4$fits[[20]]$cmdstan_summary() +``` +Try plotting SBC for each iteration: + +```{r} +for(i in 1:total_calib_iters){ + print(plot_rank_hist(calibration_result[[paste0("result_", i)]], parameters = "tau")) + +} +``` + +## Attempt 2. Fit GMM in unconstrained space + +Modify stan model to receive samples in R, convert to positive bounded value within the model. + +```{r} + +cat( readLines("models/eightschools_cp_calib.stan") , sep = "\n" ) +``` + +```{r} +generator_eightschools_cp_gmm_unbounded <- function(mixture_mean_draws_rvars, mixture_bw_draws_rvars, fixed_values){ + # fixed value across simulated datasets + nsims <- fixed_values$nsims + J <- fixed_values$J + sigma <- fixed_values$sigma + + + while(1){ + # Draw tau from the proposed normal mixture, unconstrained scale + tau_trans <- rvar_rng(rnorm, 1, sample(mixture_mean_draws_rvars$tau, 1), sd = mixture_bw_draws_rvars$tau) + tau = exp(tau_trans) + + # other parameters are drawn from the default prior + mu = rvar_rng(rnorm, 1, mean = 0, sd = 5, ndraws = ndraws(tau)) + # transform tau to positive values before drawing samples + theta <- rvar_rng(rnorm, 1, mean = mu, sd = tau) + + # draw y from simulated parameters + y <- rvar_rng(rnorm, J, mean = theta, sd = sigma) + + # > mean(c(28, 8, -3, 7, -1, 1, 18, 12)) + sd(c(28, 8, -3, 7, -1, 1, 18, 12)) * 3 + # [1] 40.08118 + + if(any(is.na(y))){ + print("NA values created in generator") + print(mu) + print(tau_trans) + print(tau) + print(theta) + print(sigma) + stop("Terminating") + } + + if(all(draws_of(y) < 40)){ # mean(y) + sd(y) * 3 + break + } + } + + + # we give stan constrained tau values + gen_data_draws_rvars <- draws_rvars(J = J, y = y, sigma = sigma, nsims = nsims, mm_mean = mixture_mean_draws_rvars$tau, mm_bandwidth = mixture_bw_draws_rvars$tau) + + SBC_datasets( + parameters = as_draws_matrix(list(tau = tau)), + generated = draws_rvars_to_standata(gen_data_draws_rvars) + ) +} +``` + +Since tau is now exp(tau_trans), we center the initial mixture to 0 +Bandwidth stays the same. + +```{r} +fixed_values_unbounded <- list(J = 8, nsims = nsims, sigma = rvar(array(rep(as.integer(c(15, 10, 16, 11, 9, 11, 10, 18)), each = nsims), dim = c(nsims, 8)))) + +initial_mixture_mean_unbounded <- draws_rvars(tau = rvar_rng(rcauchy, nsims, location = 0, scale = 0.01, ndraws = nsims)) +initial_mixture_bw_unbounded <- draws_rvars(tau = abs(rvar_rng(rnorm, 1, mean = 1, sd = 0.5, ndraws = nsims))) +``` + +Run self_calib again, with the same artificial max interaction limit: + + +```{r} +calibration_result_unbounded <- self_calib(generator = generator_eightschools_cp_gmm_unbounded, backend = backend_hmc, mixture_means_init_draws_rvars = initial_mixture_mean_unbounded, + mixture_bw_init_draws_rvars = initial_mixture_bw_unbounded, nsims_fn = function(...){nsims},thin = 4, max_selfcalib_iters = 10, + save_all_results = TRUE, fixed_generator_args = list(fixed_values = fixed_values_unbounded)) +``` + + + + +```{r} +total_calib_iters <- length(names(calibration_result_unbounded)) +combined_df <- matrix(nrow = length(names(calibration_result_unbounded)) * nsims, ncol = 2) +colnames(combined_df) <- c("tau_mean", "iteration") +for(i in 1:length(names(calibration_result_unbounded))){ + for(j in 1:nsims){ + combined_df[(i-1) * nsims + j, ] <- c(mean(as.vector(subset_draws(calibration_result_unbounded[[paste0("result_", i)]]$fits[[j]]$draws(), variable = "tau"))), i) + } + +} +combined_df <- data.frame(combined_df) +ggplot(combined_df, aes(x = iteration, y = tau_mean)) + geom_point(mapping = aes(x = iteration, y = tau_mean)) + + stat_summary(fun.data = "mean_sdl", fun.args = list(mult = 2), geom = "pointrange", color = "red") + scale_x_continuous(limits = c(0, 5))# + scale_y_continuous(limits = c(0, 100)) +``` + + + +```{r} +for(i in 1:total_calib_iters){ + print(plot_rank_hist(calibration_result_unbounded[[paste0("result_", i)]], parameters = "tau")) +} +``` + diff --git a/vignettes/implementing_backends.Rmd b/vignettes/implementing_backends.Rmd index 522829e..5a78fcc 100644 --- a/vignettes/implementing_backends.Rmd +++ b/vignettes/implementing_backends.Rmd @@ -13,7 +13,7 @@ vignette: > This vignette will discuss how to implement a new type of backend for the SBC package and thus allow you to integrate the SBC package with any method/algorithm that -can produce samples. +can produce draws from a posterior distribution or its approximation. As an example, we'll wrap the base R `glm` function as a backend. This will also let us discuss how we can treat frequentist models as approximations to Bayesian models and that SBC can tell us how faithful such an approximation is. @@ -31,7 +31,7 @@ library(formula.tools) library(MASS) # Setup caching of results -cache_dir <- "./implementing_backends_SBC_cache" +cache_dir <- "./_implementing_backends_SBC_cache" if(!dir.exists(cache_dir)) { dir.create(cache_dir) } @@ -41,16 +41,16 @@ if(!dir.exists(cache_dir)) { ## Minimal backend support If you remember from the, [interface introduction vignette](https://hyunjimoon.github.io/SBC/articles/basic_usage.html) -a backend for the SBC package describes a function that takes in data and produces samples, +a backend for the SBC package describes a function that takes in data and produces posterior draws, i.e. the backend holds all the information other than data that are needed to run the given statistical method. For practical reasons, the SBC package actually splits that function into two steps: first, there is an S3 generic [`SBC_fit()`](https://hyunjimoon.github.io/SBC/reference/SBC_fit.html), -that takes a backend object, dataset and the number of cores it is allowed to use +that takes a backend object, observed data and the number of cores it is allowed to use and produces an arbitrary object representing the fit. Additionally, there is an `SBC_fit_to_draws_matrix()` S3 generic that takes in the resulting fit -and returns posterior samples in the `posterior::draws_matrix` format. +and returns posterior draws in the `posterior::draws_matrix` format. The split here is useful because it lets the `SBC_results` object to store the raw fit objects, which can then be inspected by user for debugging purposes. The SBC package @@ -67,7 +67,7 @@ of the backend. Here, we'll just capture all the arguments (which we will later SBC_backend_glm <- function(...) { args = list(...) if(any(names(args) == "data")) { - stop(paste0("Parameter 'data' cannot be provided when defining a backend", + stop(paste0("Argument 'data' cannot be provided when defining a backend", " as it needs to be set by the SBC package")) } @@ -79,7 +79,7 @@ So e.g. `SBC_backend_glm(y ~ x, family = "poisson")` would create a valid backend representing a simple Poisson regression. Now we create an implementation of `SBC_fit` for the newly created class. -We take the generated dataset (`generated` parameter) and pass it - along +We take the generated data (`generated` argument) and pass it - along with all the arguments we stored in the constructor - to `glm` via `do.call`. We ignore the `cores` argument as we don't have multicore support. @@ -108,7 +108,7 @@ In this way we can see `glm` as an approximate Bayesian method where: And that's exactly what we'll do: the `coef` method for `glm` fit returns the MLE and the `vcov` method returns the variance-covariance matrix implied by the -Hessian, so all we need is to take a bunch of samples (here 1000) from this +Hessian, so all we need is to take a bunch of draws (here 1000) from this multivariate normal. Therefore, the implementation is also very simple: ```{r} @@ -134,8 +134,8 @@ generator_single_poisson <- function(N) { y <- rpois(N, exp(mus)) list( - parameters = list( - # Naming the parameters in the same way glm will name coefs + variables = list( + # Naming the variables in the same way glm will name coefs `(Intercept)` = log_intercept, x = beta ), @@ -145,14 +145,14 @@ generator_single_poisson <- function(N) { set.seed(354662) datasets_poisson <- generate_datasets(SBC_generator_function(generator_single_poisson, N = 100), - n_datasets = 100) + n_sims = 100) ``` Then we'll construct a matching backend and compute the results. ```{r} backend_poisson <- SBC_backend_glm(y ~ x, family = "poisson") -res_poisson <- compute_results(datasets_poisson, +res_poisson <- compute_SBC(datasets_poisson, backend_poisson, thin_ranks = 1, cache_mode = "results", @@ -160,7 +160,7 @@ res_poisson <- compute_results(datasets_poisson, ``` -We have set `thin_ranks = 1` as no thinning is needed (the samples are i.i.d. by +We have set `thin_ranks = 1` as no thinning is needed (the draws are i.i.d. by construction). The rank and ecdf plots show no big problems @@ -170,11 +170,11 @@ plot_rank_hist(res_poisson) plot_ecdf_diff(res_poisson) ``` -This is not unexpected - we've used a large dataset and a simple model, so +This is not unexpected - we've used a large number of observations and a simple model, so choice of prior should have negligible impact on the posterior and the normal approximation is very close to the exact Bayesian solution. -We can see that both model parameters are recovered almost exactly in almost all +We can see that both variables are recovered almost exactly in almost all fits: ```{r poisson_sim_estimated} @@ -189,13 +189,13 @@ comfortable to use. Let's walk through the options and se and how they can be im `glm` wrapper. Since (unlike MCMC methods) the `glm` approximation does -not produce autocorrelated samples, we can implement `SBC_backend_iid_samples` +not produce autocorrelated draws, we can implement `SBC_backend_iid_draws` to return `TRUE`. The SBC package will then by default use `thin_ranks = 1` -argument to `compute_results` and will not assess convergence/autocorrelation via the +argument to `compute_SBC` and will not assess convergence/autocorrelation via the R-hat and ESS diagnostics. ```{r} -SBC_backend_iid_samples.SBC_backend_glm <- function(backend) { +SBC_backend_iid_draws.SBC_backend_glm <- function(backend) { TRUE } ``` @@ -312,7 +312,7 @@ about parallel support. SBC uses the `future` package to allow paralellization. This means that when user sets up a parallel environment (e.g. via `plan(multisession)`), -the `SBC_fit()`, `SBC_fit_to_draws_matrix()` and `SBC_backend_iid_samples()` +the `SBC_fit()`, `SBC_fit_to_draws_matrix()` and `SBC_backend_iid_draws()` implementations will run in a fresh session. To make this work smoothly, the functions should call non-base R functions explicitly via namespace declaration (e.g. note that we call `MASS::mvrnorm`, not just `mvrnorm`). @@ -321,13 +321,13 @@ If you are implementing the backend to become part of the SBC package, nothing more is needed for paralellization to work. If however you are just building an ad-hoc backend that lives in your global environment, you will also need to pass the three functions -to the `globals` argument of `compute_results` which will make them available on +to the `globals` argument of `compute_SBC` which will make them available on all workers i.e. use: ``` -compute_results(..., globals = c("SBC_fit.SBC_backend_glm", +compute_SBC(..., globals = c("SBC_fit.SBC_backend_glm", "SBC_fit_to_draws_matrix.glm", - "SBC_backend_iid_samples.SBC_backend_glm")) + "SBC_backend_iid_draws.SBC_backend_glm")) ``` @@ -355,23 +355,23 @@ by running only the approximate method (a lot of times) and look at SBC results. This may still be faster than running a single fully Bayesian fit. Additionally, fitting with an approximate algorithm can be useful to run approximate power calculations where it lets us cheaply fit a lot of -simulated datasets to e.g. understand how +simulations to e.g. understand how the width of our posterior intervals changes with sample size and at the same time we learn, whether the approximation is problematic in some way. -For the sake of example, let's assume we've already gathered a dataset that +For the sake of example, let's assume we've already gathered data that we want to analyze with Bayesian logistic regression. So our data generating process will use the observed covariate values but simulate new coefficients and outcome data. Below is a simple implementation with normal priors on the intercept and predictors. Note that we do some [rejection sampling](https://hyunjimoon.github.io/SBC/articles/rejection_sampling.html) -here to avoid using datasets where the generated response +here to avoid using simulations where the generated response is the same or almost the same for all rows. ```{r} generator_single_logistic <- function(formula, - dataset, + template_data, intercept_prior_loc = 0, intercept_prior_width = 2, predictor_prior_loc = 0, @@ -381,7 +381,7 @@ generator_single_logistic <- function(formula, stop("The formula has to have just a single response") } - X <- model.matrix(formula, dataset) + X <- model.matrix(formula, template_data) repeat { coefs <- rnorm(ncol(X), predictor_prior_loc, sd = predictor_prior_width) @@ -398,18 +398,18 @@ generator_single_logistic <- function(formula, } } - dataset_mod <- dataset - dataset_mod[[response_name]] <- y + data_mod <- template_data + data_mod[[response_name]] <- y list( - parameters = as.list(coefs), - generated = dataset_mod + variables = as.list(coefs), + generated = data_mod ) } ``` -We are going to use the `indo_rct` dataset from the `medicaldata` package. The +We are going to use the `indo_rct` dataset from the `medicaldata` package as a template. The dataset contains the results of a randomized, placebo-controlled, prospective 2-arm trial of indomethacin 100 mg PR once vs. placebo to prevent post-ERCP Pancreatitis in 602 patients. You can inspect the [codebook](https://htmlpreview.github.io/?https://github.com/higgi13425/medicaldata/blob/master/man/codebooks/indo_rct_codebook.html) as well as the [published paper](https://www.nejm.org/doi/full/10.1056/NEJMoa1111103) online. The citation for the paper is: @@ -427,15 +427,15 @@ set.seed(6524243) datasets_indo_simple <- generate_datasets(SBC_generator_function( generator_single_logistic, formula = formula_indo_simple, - dataset = medicaldata::indo_rct), - n_datasets = 500) + template_data = medicaldata::indo_rct), + n_sims = 500) backend_indo_simple <- SBC_backend_glm(formula = formula_indo_simple, family = "binomial") ``` ```{r} -res_indo_simple <- compute_results(datasets_indo_simple, backend_indo_simple, +res_indo_simple <- compute_SBC(datasets_indo_simple, backend_indo_simple, cache_mode = "results", cache_location = file.path(cache_dir,"indo_simple")) ``` @@ -456,7 +456,7 @@ plot_coverage(res_indo_simple) we can make this precise by inspecting the same results numerically: ```{r} -stats_effect <- res_indo_simple$stats[res_indo_simple$stats$parameter == "rx1_indomethacin",] +stats_effect <- res_indo_simple$stats[res_indo_simple$stats$variable == "rx1_indomethacin",] main_eff_coverage <- empirical_coverage(stats_effect, width = c(0.5,0.9, 0.95)) main_eff_coverage ``` @@ -474,15 +474,16 @@ plot_sim_estimated(res_indo_simple) ``` There is a simulation where the posterior uncertainty is very large. -This corresponds to dataset where the outcome is the same for all +This corresponds to observed data where the outcome is the same for all rows where the treatment was used: ```{r} -biggest_sd_dataset <- res_indo_simple$stats$dataset_id[ +biggest_sd_sim <- res_indo_simple$stats$sim_id[ which.max(res_indo_simple$stats$sd)] -table(datasets_indo_simple$generated[[biggest_sd_dataset]][c("outcome", "rx")]) +table(datasets_indo_simple$generated[[biggest_sd_sim]][c("outcome", "rx")]) ``` -Filtering the extreme datasets out, we see that most commonly, we + +Filtering the extreme simulations out, we see that most commonly, we get a decently precise estimate. ```{r indo_simple_sim_est} @@ -517,8 +518,8 @@ formula_indo_complex <- outcome ~ rx + site + gender + age + risk datasets_indo_complex <- generate_datasets(SBC_generator_function( generator_single_logistic, formula = formula_indo_complex, - dataset = indo_rct_complex), - n_datasets = 500) + template_data = indo_rct_complex), + n_sims = 500) backend_indo_complex <- SBC_backend_glm(formula = formula_indo_complex, family = "binomial") ``` @@ -527,7 +528,7 @@ backend_indo_complex <- SBC_backend_glm(formula = formula_indo_complex, family = Now we are ready to run SBC: ```{r} -res_indo_complex <- compute_results(datasets_indo_complex, backend_indo_complex, +res_indo_complex <- compute_SBC(datasets_indo_complex, backend_indo_complex, cache_mode = "results", cache_location = file.path(cache_dir,"indo_complex")) ``` @@ -552,7 +553,7 @@ The main effect of interest (`rx1_indomethacin`) is however still reasonably wel ```{r} -stats_effect <- res_indo_complex$stats[res_indo_complex$stats$parameter == "rx1_indomethacin",] +stats_effect <- res_indo_complex$stats[res_indo_complex$stats$variable == "rx1_indomethacin",] main_eff_coverage <- empirical_coverage(stats_effect, width = c(0.5,0.9, 0.95)) main_eff_coverage ``` @@ -569,21 +570,21 @@ set.seed(1685554) datasets_indo_complex_narrow <- generate_datasets(SBC_generator_function( generator_single_logistic, formula = formula_indo_complex, - dataset = indo_rct_complex, + template_data = indo_rct_complex, intercept_prior_loc = 3, intercept_prior_width = 0.5, predictor_prior_loc = c(-2, 2), predictor_prior_width = 0.5), - n_datasets = 500) + n_sims = 500) ``` ```{r} -res_indo_complex_narrow <- compute_results(datasets_indo_complex_narrow, backend_indo_complex, +res_indo_complex_narrow <- compute_SBC(datasets_indo_complex_narrow, backend_indo_complex, cache_mode = "results", cache_location = file.path(cache_dir,"indo_complex_narrow")) ``` -This is enough to make basically all the parameters poorly calibrated: +This is enough to make basically all the variables poorly calibrated: ```{r indo_complex_narrow_ranks} plot_rank_hist(res_indo_complex_narrow) @@ -601,16 +602,16 @@ set.seed(3289542) datasets_indo_simple_narrow <- generate_datasets(SBC_generator_function( generator_single_logistic, formula = formula_indo_simple, - dataset = medicaldata::indo_rct, + template_data = medicaldata::indo_rct, intercept_prior_loc = 3, intercept_prior_width = 0.5, predictor_prior_loc = c(-2, 2), predictor_prior_width = 0.5), - n_datasets = 500) + n_sims = 500) ``` ```{r} -res_indo_simple_narrow <- compute_results(datasets_indo_simple_narrow, backend_indo_simple, +res_indo_simple_narrow <- compute_SBC(datasets_indo_simple_narrow, backend_indo_simple, cache_mode = "results", cache_location = file.path(cache_dir,"indo_simple_narrow")) ``` diff --git a/vignettes/indexing.Rmd b/vignettes/indexing.Rmd index 56f6940..77d73a6 100644 --- a/vignettes/indexing.Rmd +++ b/vignettes/indexing.Rmd @@ -20,16 +20,21 @@ Let's setup the environment. library(SBC); library(ggplot2) -use_cmdstanr <- TRUE # Set to false to use rstan instead +use_cmdstanr <- getOption("SBC.vignettes_cmdstanr", TRUE) # Set to false to use rstan instead if(use_cmdstanr) { library(cmdstanr) } else { library(rstan) + rstan_options(auto_write = TRUE) } # Setup caching of results -cache_dir <- "./indexing_SBC_cache" +if(use_cmdstanr) { + cache_dir <- "./_indexing_SBC_cache" +} else { + cache_dir <- "./_indexing_rstan_SBC_cache" +} if(!dir.exists(cache_dir)) { dir.create(cache_dir) } @@ -86,7 +91,7 @@ worry about performance here. ```{r} -single_dataset_regression <- function(N, K) { +single_sim_regression <- function(N, K) { x <- matrix(rnorm(n = N * K, mean = 0, sd = 1), nrow = N, ncol = K) alpha <- rnorm(n = 1, mean = 0, sd = 1) beta <- rnorm(n = K, mean = 0, sd = 1) @@ -102,7 +107,7 @@ single_dataset_regression <- function(N, K) { } list( - parameters = list( + variables = list( alpha = alpha, beta = beta, sigma = sigma), @@ -115,26 +120,26 @@ single_dataset_regression <- function(N, K) { } ``` -We'll start with just 10 datasets to get a quick computation - this will still +We'll start with just 10 simulations to get a quick computation - this will still let us see big problems (but not subtle issues) ```{r} set.seed(5666024) datasets_regression <- generate_datasets( - SBC_generator_function(single_dataset_regression, N = 100, K = 2), 10) + SBC_generator_function(single_sim_regression, N = 100, K = 2), 10) ``` Now we can use all of the backends to fit the generated datasets. ```{r} -results_regression_1 <- compute_results(datasets_regression, backend_regression_1, +results_regression_1 <- compute_SBC(datasets_regression, backend_regression_1, cache_mode = "results", cache_location = file.path(cache_dir, "regression1")) -results_regression_2 <- compute_results(datasets_regression, backend_regression_2, +results_regression_2 <- compute_SBC(datasets_regression, backend_regression_2, cache_mode = "results", cache_location = file.path(cache_dir, "regression2")) -results_regression_3 <- compute_results(datasets_regression, backend_regression_3, +results_regression_3 <- compute_SBC(datasets_regression, backend_regression_3, cache_mode = "results", cache_location = file.path(cache_dir, "regression3")) ``` @@ -160,7 +165,7 @@ plot_rank_hist(results_regression_2) But the second model is actually not looking good. In fact there is an indexing bug. The problem is the line `mu[i] += beta[j] * x[j, j];` which should have `x[i, j]` instead. We see that this -propagates most strongly to the `sigma` parameter (reusing the same `x` element leads to more similar predictions for each row, so `sigma` needs to be inflated to accommodate this) +propagates most strongly to the `sigma` variable (reusing the same `x` element leads to more similar predictions for each row, so `sigma` needs to be inflated to accommodate this) ```{r plots_regression3} diff --git a/vignettes/iter-binom-logistic.Rmd b/vignettes/iter-binom-logistic.Rmd new file mode 100644 index 0000000..60e155a --- /dev/null +++ b/vignettes/iter-binom-logistic.Rmd @@ -0,0 +1,810 @@ +--- +title: "iter-binomial-laplace" +output: html_document +author: "Hyunji Moon" +--- +```{r setup, include=FALSE, warning=FALSE} +library(SBC) +library(cmdstanr) +library(parallel) +library(bayesplot) +library(posterior) +library(dplyr) +library(rstan) +library(future) +library(ggpubr) +library(rstanarm) +library(ggplot2) +options(mc.cores = parallel::detectCores()) +plan(multisession) +options(SBC.min_chunk_size = 5) +set.seed(1984) +``` + + +Definition: + +- simulation prior := prior distribution that informs the prior predictive simulations and is being iteratively updated + +- inference prior := prior distribution that informs the posterior distribution, and hence the posterior sampling method + +- prior predictive distribution $P(y)$:= marginal distribution of $y, p(y)$ + +- data-averaged posterior $P(\theta')$:= combined posterior samples from each datasets + +- posterior sampling method := a.k.a. inference algorithm in which distribution is the function of prior predictive distribution and inference prior + +- default prior of chosen likelihood and posterior sampling method := wide enough prior to the level of not hurting self-consistency + +Prior predictive distribution is determined by two components, prior distribution and likelihood. +$$p(y_1,..,y_n) = \int \prod_{i=1}^{n} f(y_i|\theta)p(\theta)d\theta$$ + +Let us denote the distribution of likelihood and posterior sampling method as $F, G^{-1}$. Inverse is used to denote most poterior sampling methods reversely uses likelihood distribution to minimize the distance between the target and generated distribution. In SBC package, `glm` formula used for predictive distribution can be used as a approximation, what we call backend. See [implementing_backends](https://hyunjimoon.github.io/SBC/articles/implementing_backends.htmls) vignette for this. + +![F, G_t^{-1}, Regularizer determine the outcome of converged distribution, which we call default prior.](iter_overview.png) + +Proposition: Iteration of prior predictive simulation, posterior sampling, and regularizing converge to a default prior for a given prior distribution family, likelihood, and inference algorithm. + +This is due to the recurrence of well-calibrated regions which will be illustrated in experiment 1 which shows different priors converging to the same distribution for simple Bernoulli likelihood and Laplace approximation as a inference algorithm. This approximation truncates Taylor expansion of the log target density at the mode to the sencond order i.e. $\mu_{t+1} = argmax \;f(w)$, $\sigma_{t+1} = -\frac{d^{2}}{d w^{2}} \log f(w) ; w=w_{0}$. + +# PASSIVE UPDATE +# Experient 1. 1.Normal simulation prior (samples: 100), 2.Bernouli-logit prior predictive simultation, 3. Laplace approximation posterior sampling (draws: 100), 4. Plugging mean and sd of the data-averaged posterior to the next simulation prior as the regularizer + +Target parameter is logit-transformed probability, $a$. Binomial likelihood and laplace approximation inference algorithm on logit scale is used. Hyperparameters for laplace approximation are $\mu, \sigma$ which correspond to posterior distribution mode and second derivative at the mode. These hyperparameter values are set as the prior parameter for the iteration. Results show starting from $N(0, 3^2)$ distribution, initial non-normal distribution slowly transforms to normal form to adjust to the constraints imposed by the approximation of inference algorithm, in this case normal distribution. Final convergence is around $N(0, 0.5^2)$. + +```{r, warning=FALSE, error=FALSE} +generator_binom <- function(lambda_mu, lambda_sigma, fixed_sim_args){ + # fixed value across simulated datasets + # experiment settings + nobs <- fixed_sim_args$nobs + link_type <- fixed_sim_args$link_type + # clamped parameters + + # generate + eta <- rnorm(1, mean = lambda_mu, sd=lambda_sigma) + mu <- invtf_param_vec(eta, link_type = link_type) + Y <- rbinom(nobs, size = nsize, prob = mu) + list( + parameters = list(eta = eta), + generated = list(nobs= nobs, K = K, X = X, shape = shape, + lambda_mu = lambda_mu, lambda_log_sigma = log(lambda_sigma), + Y = Y) + ) +} +fixed_sim_args_binom <- list(nobs = nobs, link_type = 1) +datasets_binom <- generate_datasets(SBC_generator_function(generator_binom, lambda_mu, lambda_sigma, fixed_sim_args_binom), n_datasets = nsims) + +# prior hyperparameters +lambda_mu <- 2 +lambda_sigma <- 5 + +# link function (1 = logit, 2 = probit, 3 = cloglog) +link <- 1 + +# maximal number of SBC iterations +niter <- 100 + +# number of SBC simulations per iteration (generator) +nsims <- 80 + +# number of draws per posterior approximation (backend) +ndraws <- 100 + +# number of observations +nobs <- 2 + +# number of binomial trials per observation +nsize <- 10 + +nchains <- 1 +# tolerance +tol <- 0.02 + +# learning rate +gamma <- 0.5 + +# hyperparameter update type +updator = "heuristic" + +# step2: posterior sampling +rstan_mod <- stan_model("models/binom-laplace.stan") +cmdstan_mod <- cmdstanr::cmdstan_model("models/binom-laplace.stan") +rstan_backend_hmc <- SBC_backend_rstan_sample(rstan_mod, chains = nchains, iter = ndraws / nchains + 1000, warmup=1000) + +backend_vi <- SBC_backend_cmdstan_variational(cmdstan_mod, output_samples = ndraws, algorithm = "fullrank") + +# TODO +#backend_laplace <- SBC_backend_rstan_laplace(rstan_mod, iter_sampling = ndraws) +# backend_hmc <- SBC_backend_cmdstan_sample(cmdstan_mod, chains = 2, iter_sampling = ndraws / 2) if cmdstanr return hession + +# initial badly calibrated +result_hmc <- compute_results(datasets, rstan_backend_hmc, thin_ranks = 1) +result_vi <- compute_results(datasets, backend_vi, thin_ranks = 1) +plot_rank_hist(result_hmc) +plot_rank_hist(result_vi) + +# step3: updating hyperparmeters +param_sc_hmc <- self_calib_adaptive(generator_binom, rstan_backend_hmc, updator, "eta", lambda_mu, lambda_sigma, nsims, tol, fixed_args = list(fixed_sim_args = fixed_sim_args)) +plot_rank_hist(param_sc_hmc) + +param_sc_vi <- self_calib_adaptive(generator_binom, backend_vi, updator, "eta", lambda_mu, lambda_sigma, nsims, tol, fixed_args = list(fixed_sim_args = fixed_sim_args)) +plot_rank_hist(param_sc_vi) +``` + + +HMC is underdispersed. ADVI is skewed to the right and has a tendency to under-estimate. + +```{r warning=FALSE} +generator_gamma <- function(lambda_mu, lambda_sigma, fixed_sim_args){ + # fixed value across simulated datasets + ## meta + nobs <- fixed_sim_args$nobs + K <- fixed_sim_args$K + shape <- fixed_sim_args$shape + + # predictor + X <- array(rnorm(nobs * K, mean = 1, sd = 1), dim = c(nobs, K)) + b <- rnorm(K, mean = 0, sd = 1) + # generate + eta <- rnorm(1, mean = lambda_mu, sd=lambda_sigma) + logmu <- as.numeric(eta+ X %*% b) + mu <- exp(logmu) + Y <- rgamma(nobs, shape = shape, scale = mu/shape) + list( + parameters = list(eta = eta), + generated = list(nobs= nobs, K = K, X = X, shape = shape, + lambda_mu = lambda_mu, lambda_log_sigma = log(lambda_sigma), + Y = Y) + ) +} + +``` + +```{r, warning=FALSE, error=FALSE} +backend_vi <- SBC_backend_cmdstan_variational(mod_gr, output_samples = M, algorithm = "fullrank") +result_25_vi <- compute_results(datasets_25, backend_vi, thin_ranks = 1) +plot_rank_hist(result_25_vi) +param_sc_vi <- self_calib_adaptive(generator_binom, cmdstan_backend_vi, "eta", lambda_mu, lambda_sigma, nsims, tol, fixed_args = list(fixed_sim_args = fixed_sim_args)) +plot_rank_hist(param_sc_vi) +``` + +Would this converging distribution be unique? From the result below, starting from $N(3, 1^2)$ which is unsymmetrical compared to $N(0, 3^2)$, also converges to a similar distribution. A likeable explanation is the recurrence of parameter values within well-calibrated region as opposed to that are not. For instance, if $logit^{-1}(\theta)$ starts from 0.9, is likely to form a non-normal posterior whose mode is more likely to move away from 0.9. On the other hand, parameter values near .5 forms a symmetric and stable posterior which in most cases has its mode near .5. In this case, the disstribution converge to around $N(0, 0.4^2)$ + +```{r, warning=FALSE, error=FALSE} +model = stan_model("./models/binom-laplace.stan") +SBC_iter <- 1000 +# prior hyperparameters +mu <- 3 +sigma <- 1 +mu_lst <- c() +sigma_lst <- c() +# experiment settings +## the number of dataset +nsims <- 100 +## outcome dimension for each dataset +nobs <- 2 +## posterior samples for each dataset +ndraws <- 100 +## number of binomial trials +nsize <- 1 +for (j in 1:SBC_iter){ + post_draws_theta <- c() + theta <- rnorm(nsims, mu, sigma) + for (i in 1:nsims) { + p <- invlogit(theta[i]) + y <- rbinom(nobs, nsize, p) + dat <- list(Y=as.array(y), nsize=nsize, nobs=nobs, mu = mu, sigma = sigma) + fit <- optimizing(model, data = dat, hessian = TRUE) + + # approximate posterior mean via posterior mode + post_mean_theta <- fit$par["theta"] + + # approximate posterior sd via (sqrt) of the inverse negative Hessian + post_sd_theta <- sqrt(solve(-fit$hessian)) + post_draws_theta <- c(post_draws_theta, rnorm(ndraws, post_mean_theta, post_sd_theta)) + } + if ((j-1) %% 300 == 0){ + hist(invlogit(post_draws_theta), xlim = range(0,1), main = paste(j, "th itheration histogram")) + } + + # regularizer: update hyperparameters + mu_est <- mean(post_draws_theta) + mu <- mu_est + sigma_est <- sd(post_draws_theta) + sigma <- sigma_est + + if ((j-1) %% 30 ==0){ + hist(invlogit(post_draws_theta), xlim = range(0,1), main = paste(j, "th itheration histogram")) + } + mu_lst <- c(mu_lst, mu) + sigma_lst <- c(sigma_lst, sigma) +} +plot(unlist(mu_lst), ylab = "prior mean") +plot(unlist(sigma_lst), ylab = "prior sd") +print(sigma_lst[length(sigma_lst)]) +``` + +# Experiment 2. Change experimental settings +## 2-1. Decreasing the number of prior samples from 100 to 10 + +Drastic jumps are observed. This may be due to simultation noise but observed oscillation of after the jump tells zero calibration bias prior is the form of a point, not a range. + +```{r, warning=FALSE, error=FALSE} +model = stan_model("./models/binom-laplace.stan") +SBC_iter <- 10000 +# prior hyperparameters +mu <- 1 +sigma <- 10 +mu_lst <- c() +sigma_lst <- c() +# experiment settings +## the number of dataset +nsims <- 100 +## outcome dimension for each dataset +nobs <- 2 +## posterior samples for each dataset +ndraws <- 100 +## number of binomial trials +nsize <- 1 +for (j in 1:SBC_iter){ + post_draws_theta <- c() + theta <- rnorm(nsims, mu, sigma) + for (i in 1:nsims) { + p <- invlogit(theta[i]) + y <- rbinom(nobs, nsize, p) + dat <- list(Y=as.array(y), nsize=nsize, nobs=nobs, mu = mu, sigma = sigma) + fit <- optimizing(model, data = dat, hessian = TRUE) + + # approximate posterior mean via posterior mode + post_mean_theta <- fit$par["theta"] + + # approximate posterior sd via (sqrt) of the inverse negative Hessian + post_sd_theta <- sqrt(solve(-fit$hessian)) + post_draws_theta <- c(post_draws_theta, rnorm(ndraws, post_mean_theta, post_sd_theta)) + } + + # regularizer: update hyperparameters + mu_est <- mean(post_draws_theta) + mu <- mu_est + sigma_est <- sd(post_draws_theta) + sigma <- sigma_est + + if ((j-1) %% 30 ==0){ + hist(invlogit(post_draws_theta), xlim = range(0,1), main = paste(j, "th itheration histogram")) + } + mu_lst <- c(mu_lst, mu) + sigma_lst <- c(sigma_lst, sigma) +} +plot(unlist(mu_lst), ylab = "prior mean") +plot(unlist(sigma_lst), ylab = "prior sd") +``` + +## 2-2. Increasing outcome dimension what is usually called sample size + +```{r, warning=FALSE, error=FALSE} +model = stan_model("./models/binom-laplace.stan") +SBC_iter <- 10000 +# prior hyperparameters +mu <- 1 +sigma <- 10 +mu_lst <- c() +sigma_lst <- c() +# experiment settings +## the number of dataset +nsims <- 100 +## outcome dimension for each dataset +nobs <- 100 +## posterior samples for each dataset +ndraws <- 100 +## number of binomial trials +nsize <- 1 +for (j in 1:SBC_iter){ + post_draws_theta <- c() + theta <- rnorm(nsims, mu, sigma) + for (i in 1:nsims) { + p <- invlogit(theta[i]) + y <- rbinom(nobs, nsize, p) + dat <- list(Y=as.array(y), nsize=nsize, nobs=nobs, mu = mu, sigma = sigma) + fit <- optimizing(model, data = dat, hessian = TRUE) + + # approximate posterior mean via posterior mode + post_mean_theta <- fit$par["theta"] + + # approximate posterior sd via (sqrt) of the inverse negative Hessian + post_sd_theta <- sqrt(solve(-fit$hessian)) + post_draws_theta <- c(post_draws_theta, rnorm(ndraws, post_mean_theta, post_sd_theta)) + } + + # regularizer: update hyperparameters + mu_est <- mean(post_draws_theta) + mu <- mu_est + sigma_est <- sd(post_draws_theta) + sigma <- sigma_est + + if ((j-1) %% 30 ==0){ + hist(invlogit(post_draws_theta), xlim = range(0,1), main = paste(j, "th itheration histogram")) + } + mu_lst <- c(mu_lst, mu) + sigma_lst <- c(sigma_lst, sigma) +} +plot(unlist(mu_lst), ylab = "prior mean") +plot(unlist(sigma_lst), ylab = "prior sd") +``` +## 2-3. Changing the order of the procedure. +Data-averaged posterior where average happens in outcome level (gather y from each dataset then computation once) vs parameter level (computation in each dataset then gather parameter). The former is of course more normal-like as the Laplace approximation (computation) happened lastly. Procedure change. +```{r} +model <- stan_model("models/binom-laplace.stan") +# prior hyperparameters +mu <- 0 +sigma <- 10 +# link function (1 = logit, 2 = probit, 3 = cloglog) +link <- 1 + + # maximal number of SBC iterations +niter <- 20 + +# number of SBC simulations per iteration +nsims <- 100 + +# number of draws per posterior approximation +ndraws <- 100 + +# number of observations +nobs <- 1 + +# number of binomial trials per observation +nsize <- 2 +get_posterior_1 <- function(eta, mu, sigma) { + # multiple options for link functions + if (link == 1) { + p = brms:::inv_logit(eta) + } else if (link == 2) { + p = dnorm(eta) + } else if (link == 3) { + p = brms:::inv_cloglog(eta); + } + p = brms:::inv_logit(eta) + y <- rbinom(nobs, nsize, p) + dat <- list(Y = as.array(y), nsize=nsize, nobs=nobs, + mu = mu, sigma = sigma, link = link) + + fit <- optimizing(model, data = dat, hessian = TRUE) + + # approximate posterior mean via posterior mode + post_mean_eta <- fit$par["eta"] + + # approximate posterior sd via (sqrt) of the inverse negative Hessian + post_sd_eta <- sqrt(solve(-fit$hessian)) + + # approximate the posterior using eta normal distribution + rnorm(ndraws, post_mean_eta, post_sd_eta) +} + +get_posterior_2 <- function(y, mu, sigma) { + dat <- list(Y = as.array(y), nsize=nsize, nobs=nobs*nsims, + mu = mu, sigma = sigma) + + fit <- optimizing(model, data = dat, hessian = TRUE) + + # approximate posterior mean via posterior mode + post_mean_eta <- fit$par["eta"] + + # approximate posterior sd via (sqrt) of the inverse negative Hessian + post_sd_eta <- sqrt(solve(-fit$hessian)) + + # approximate the posterior using eta normal distribution + rnorm(ndraws, post_mean_eta, post_sd_eta) +} + +# maximal number of SBC iterations +niter <- 50 +# number of SBC simulations per iteration +nsims <- 100 +# number of draws per posterior approximation +ndraws <- 100 +# number of observations +nobs <- 1 +# number of binomial trials per observation +nsize <- 2 + +# First procedure +for (j in 1:niter) { + post_draws_eta <- c() + eta <- rnorm(nsims, mu[j], sigma[j]) + for (i in 1:nsims) { + draws_new <- get_posterior_1(eta[i], mu = mu[j], sigma = sigma[j]) + post_draws_eta <- c(post_draws_eta, draws_new) + } + mu[j+1] <- mean(post_draws_eta) + sigma[j+1] <- sd(post_draws_eta) + message("mu : ", mu[j], " mu_est : ", mu[j+1]) + hist(post_draws_eta, probability = TRUE, 30, main = paste(j, "th itheration")) + xval <- seq(min(post_draws_eta), max(post_draws_eta), length.out = 100) + lines(xval, dnorm(xval, mu[j], sigma[j])) +} + +# Second procedure +for (j in 1:niter) { + draws_y <- c() + eta <- rnorm(nsims, mu[j], sigma[j]) + for (i in 1:nsims) { + p = brms:::inv_logit(eta[i]) + draws_new <- rbinom(nobs, nsize, p) + draws_y <- c(draws_y, draws_new) + } + post_draws_eta <- get_posterior_2(draws_y, mu[j], sigma[j]) + mu[j+1] <- mean(post_draws_eta) + sigma[j+1] <- sd(post_draws_eta) + message("mu : ", mu[j], " mu_est : ", mu[j+1]) + hist(post_draws_eta, probability = TRUE, 30, main = paste(j, "th itheration")) + xval <- seq(min(post_draws_eta), max(post_draws_eta), length.out = 100) + lines(xval, dnorm(xval, mu[j], sigma[j])) +} +``` + + +# ACTIVE UPDATE +Using fixed point iteration for convergence +``` {r, warning=FALSE, error=FALSE} + +data_generator <- function(eta, mu, sigma) { + # multiple options for link functions + if (link == 1) { + p = brms:::inv_logit(eta) + } else if (link == 2) { + p = dnorm(eta) + } else if (link == 3) { + p = brms:::inv_cloglog(eta); + } + y <- rbinom(nobs, nsize, p) + dat <- list(Y = as.array(y), nsize=nsize, nobs=nobs, + mu = mu, sigma = sigma, link = link) + dat +} + +posterior_approximator <- function(eta, mu, sigma, approximator) { + dat <- data_generator(eta, mu, sigma) + + # fit the model + neval <<- neval + 1 + if (approximator == "sampling"){ + fit <- sampling(model, data = dat, iter=1000, warmup=1000-(ndraws), cores = 1, show_messages = FALSE, chains=1, refresh=0) + return(list(posterior = extract(fit, "eta")$eta, y = dat$Y)) + }else if(approximator == "optimizing") { + fit <- optimizing(model, data = dat, hessian = TRUE) + + # approximate posterior mean via posterior mode + post_mean_eta <- fit$par["eta"] + + # approximate posterior sd via (sqrt) of the inverse negative Hessian + post_sd_eta <- sqrt(solve(-fit$hessian)) + + # approximate the posterior using eta normal distribution + return(list(posterior = rnorm(ndraws, post_mean_eta, post_sd_eta), y = dat$Y)) + } + +} + +# compute the hyperparameters of the data-averaged posterior +data_averaged_posterior_p_dat_hp <- function(mu, log_sigma, approximator = "optimizing") { + sigma <- exp(log_sigma) + post_draws_eta <- c() + draws_y <- c() + eta <- rnorm(nsims, mu, sigma) + for (i in 1:nsims) { + draws_new <- posterior_approximator(eta[i], mu, sigma, approximator) + post_draws_eta <- c(post_draws_eta, draws_new$posterior) + draws_y <- c(draws_y, draws_new$y) + } + dat_tot <- list(Y = as.array(draws_y), nsize=nsize, nobs = nobs * nsims, + mu = mu, log_sigma = log_sigma, link = link) + hist(post_draws_eta, probability = TRUE, 30) + xval <- seq(min(post_draws_eta), max(post_draws_eta), length.out = 100) + lines(xval, dnorm(xval, mu, sigma)) + + mu_est <- mean(post_draws_eta) + sigma_est <- sd(post_draws_eta) + message("mu : ", mu, " mu_est : ", mu_est) + message("sigma : ", sigma, " sigma_est : ", sigma_est) + + list(eta = eta, post_draws_eta = post_draws_eta, lambda = c(mu = mu_est, log_sigma = log(sigma_est)), dat = dat_tot) +} +loss <- function(lambda, approximator) { + dap_lambda <- data_averaged_posterior_pars_dat(lambda[1], lambda[2], approximator)$lambda + message("loss : ", sum((lambda - dap_lambda)^2)) + sum((lambda - dap_lambda)^2) +} +grad_loss <- function(lambda) { + # rough finite difference gradients such that steps are bigger + # than the expected error caused by the simulations from prior and posterior + numDeriv::grad(loss, lambda, method.args=list(eps=0.3, d = 0.3)) +} +grad_loss_sens <- function(lambda, model_sens1){ + dap_lambda_dat <- data_averaged_posterior_pars_dat(lambda[1], lambda[2], approximator) + dap_lambda <- dap_lambda_dat$lambda + dap_dat_sens <- dap_lambda_dat$dat_sens + # Run the sampler. + #sampling_result <- sampling(model_sens1, data=dap_dat_sens, chains=2, iter=1000) + model_sens2 <- GetStanSensitivityModel(model_name, dap_dat_sens) + sampling_result <- optimizing(model_sens1, dap_dat_sens, draws = 100) + setClass("optimizingfit", representation(fit="list", sim="list")) + setMethod("extract", signature="optimizingfit", function(object, permute=TRUE){ + param_names <- colnames(object@fit$theta_tilde) + ret <- object@fit$theta_tilde + dim(ret) <- c(dim(object@fit$theta_tilde)[1], 1, dim(object@fit$theta_tilde)[-1]) + dimnames(ret)[[3]] <- param_names + return(ret) + }) + setMethod("get_inits", signature="optimizingfit", function(object, iter){list(split(as.numeric(object@fit$theta_tilde[iter, ]), colnames(object@fit$theta_tilde)))}) + of <- new("optimizingfit", fit=sampling_result, sim=list(warmup=0, iter=100, chains=1)) + + #sampling_result <- sampling(model_sens1, data=dap_dat_sens, chains=2, iter=1000) + #of <- sampling_result + + sens_res <- GetStanSensitivityFromModelFit(of, model_sens2) + # gradient of hyperparameter averaged over data-averaged posterior + n <- nrow(sens_res$draws_mat) + sens_mat <- (sens_res$grad_mat %*% sens_res$draws_mat) / (n - 1) + - rowMeans(sens_res$grad_mat) %*% t(colMeans(sens_res$draws_mat)) * (n / (n - 1)) + grad_self_cons <- sens_mat[,1] - 1 + return (abs(grad_self_cons)) +} +# results from solving (x-y)^2 = 0 after x +# fix points are all x = y +quad_recursion1 <- function(x, y) { + (y^2 + x^2) / (2 * y) +} +quad_recursion2 <- function(x, y) { + sqrt(2*x*y - y^2) +} +quad_recursion3 <- function(x, y) { + (y^2 + x^2) / (x + y) +} +cubic_recursion1 <- function(x, y) { + (x^3 - 3*x^2*y - y^3) / (-3*y^2) +} +cubic_recursion2 <- function(x, y) { + sqrt((x^3 + 3*x*y^2 - y^3) / (3*y)) +} +max_coupling <- function(lambda, approximator){ + eta_dapeta <- data_averaged_posterior_p_dat_hp(lambda[1], lambda[2], approximator) + #eta_dapeta <- data_averaged_posterior_pars_dat(lambda[1], lambda[2], approximator) + eta <- eta_dapeta$eta + dap_eta <- eta_dapeta$post_draws_eta + breaks <- seq(-20, 20, by = 1) + # p is categorized samples of prior + p <- hist(eta, breaks = breaks)$counts / length(eta) + # q is categorized samples of data-averaged posterior + q <- hist(dap_eta, breaks = breaks)$counts / length(dap_eta) + w = 1 - sum(abs(p - q)) + pqmin = pmin(p, q) + Z <- sum(pqmin) + # common random number generation for coupling + u <- runif(1) + if (u < w){ + tmp <- runif(pqmin / Z) + p_coup <- tmp + q_coup <- tmp + }else{ + p_coup <- runif((p - pqmin)/ 1 - Z) + q_coup <- runif((q - pqmin)/ 1 - Z) + } + p_coup +} + +# TODO: find better fix point functions +model <- stan_model("models/binom-laplace.stan") +# prior hyperparameters +mu <- -5 +sigma <- 3 +# link function (1 = logit, 2 = probit, 3 = cloglog) +link <- 1 +# maximal number of SBC iterations +niter <- 100 +# number of SBC simulations per iteration +nsims <- 20 +# number of draws per posterior approximation +ndraws <- 100 +# number of observations +nobs <- 1 +# number of binomial trials per observation +nsize <- 10 +# tolerance +tol <- 0.02 +# learning rate +gamma <- 0.5 +# approximator type +approximator = "sampling" +# type fix point method ("newton" or "heuristic") +# newton (gradient-based update does not work well at all) +fp_method <- "sensitivity" +# number of posterior approximator evaluations +neval <- 0 +loss_vec <- c() +for (j in 1:niter) { + if (fp_method == "newton") { + # use log(sigma) to have parameters on the unconstrained scale + lambda <- c(mu[j], log(sigma[j])) + loss_j <- loss(lambda, approximator) + grad_loss_j <- grad_loss(lambda) + # gradient descent update + lambda_new <- lambda - gamma * grad_loss_j + mu_new <- lambda_new[1] + sigma_new <- exp(lambda_new[2]) + } else if (fp_method == "heuristic") { + dap_pars <- data_averaged_posterior_pars_dat(mu[j], log(sigma[j]), approximator)$lambda + mu_est <- dap_pars["mu"] + log_sigma_est <- dap_pars["log_sigma"] + sigma_est <- exp(log_sigma_est) + mu_new <- quad_recursion3(mu_est, mu[j]) + log_sigma_new <- cubic_recursion1(log_sigma_est, log(sigma[j])) + sigma_new <- exp(log_sigma_new) + lambda <- c(mu[j], log(sigma[j])) + } else if (fp_method == "sensitivity") { + # use log(sigma) to have parameters on the unconstrained scale + lambda <- c(mu[j], log(sigma[j])) + p_coup <- max_coupling(lambda, approximator) + lambda <- c(mean(p_coup), sd(p_coup)) + model_name <- GenerateSensitivityFromModel("models/binom-laplace-sens.stan") + # compile to comform stansensitiy functions + model_sens1 <- stan_model(GetSamplingModelFilename(model_name)) + # compute gradient of E_p_lambda[g(theta)] + grad_loss_sens_j <- grad_loss_sens(lambda, model_sens1) + # gradient update + lambda_new <- lambda - c(grad_loss_sens_j["mu"], 0)#grad_loss_sens_j["log_sigma"]) + mu_new <- lambda_new[1] + sigma_new <- exp(lambda_new[2]) + } else { + stop("Invalid 'fp_method' argument.") + } + message("Iteration complete") + message("mu : ", mu[j], " mu_new : ", mu_new) + message("sigma : ", sigma[j], " sigma_new : ", sigma_new) + message("\n") + mu[j+1] <- mu_new + sigma[j+1] <- sigma_new + loss_vec[j+1] <- loss(c(mu[j+1], log(sigma[j+1])), approximator) + if (abs(mu[j] - mu_new) < tol & abs(sigma[j] - sigma_new) < tol) { + message("Stopping after ", j, " iterations using ", + neval, " evaluations of the posterior approximator.") + break + } +} +plot(mu) +plot(sigma) +plot(loss_vec) +# optim doesn't work well at the moment +# optim(c(mu, log(sigma)), loss) +``` + +## Heatmap plots + +```{R} + +mu_grid <- seq(from = -5, to = 5, by = 0.5) +offset <- exp(seq(0, 0)) +sigma_grid <- log(offset) +combinations <- expand.grid(mu = mu_grid, sigma = sigma_grid) +combinations$z <- rep(0.0, nrow(combinations)) +neval <- 0 +for(i in 1:nrow(combinations)){ + prior_mu <- combinations[i, "mu"] + prior_sd <- combinations[i, "sigma"] + eta <- rnorm(nsims, prior_mu, exp(prior_sd)) + #post_pars <- posterior_approximator(eta, mu = prior_mu, sigma = exp(prior_sd)) + post_ars <- data_averaged_posterior_pars(prior_mu, prior_sd) + #combinations[i, "z"] <- SBC::wasserstein(prior_samples, draws_new) + combinations[i, "z"] <- sqrt((prior_mu - post_ars["mu"]) ** 2 + (prior_sd - post_ars["log_sigma"]) ** 2) +} + +ggplot(combinations, aes(x=mu, y=sigma, fill=z)) + geom_tile() + scale_x_continuous(breaks=mu_grid) + scale_y_continuous(breaks=sigma_grid) + ylab("log_sigma") + ggtitle("hmc") + +mu <- -100 +sigma <- log(10) +eta <- rnorm(nsims, mu, exp(sigma)) +post_ars <- data_averaged_posterior_pars(mu, sigma) +``` + +## Fix sigma to 1 and try gridsearch of mu + +```{R} + +mu_grid <- seq(from = -5, to = 5, by = 0.5) +offset <- exp(seq(0, 0)) +sigma_grid <- log(offset) +combinations <- expand.grid(mu = mu_grid, sigma = sigma_grid) +combinations$z <- rep(0.0, nrow(combinations)) +neval <- 0 +model_name <- GenerateSensitivityFromModel("models/binom-laplace-sens.stan") +# compile to comform stansensitiy functions +model_sens1 <- stan_model(GetSamplingModelFilename(model_name)) +for(i in 1:nrow(combinations)){ + prior_mu <- combinations[i, "mu"] + prior_sd <- combinations[i, "sigma"] + eta <- rnorm(nsims, prior_mu, exp(prior_sd)) + #combinations[i, "z"] <- SBC::wasserstein(prior_samples, draws_new) + lambda <- c(prior_mu, prior_sd) + for(k in 1:niter){ + p_coup <- max_coupling(lambda, approximator) + lambda <- c(mean(p_coup), sd(p_coup)) + grad_loss_sens_j <- grad_loss_sens(lambda, model_sens1) + # gradient update + lambda <- lambda - c(grad_loss_sens_j["mu"], 0)#grad_loss_sens_j["log_sigma"]) + combinations[i, "z"] <- loss(lambda, "sampling") + } +} + +ggplot(combinations, aes(x=mu, y=sigma, fill=z)) + geom_tile() + scale_x_continuous(breaks=mu_grid) + scale_y_continuous(breaks=sigma_grid) + ylab("log_sigma") + ggtitle("hmc") + +mu <- -100 +sigma <- log(10) +eta <- rnorm(nsims, mu, exp(sigma)) +post_ars <- data_averaged_posterior_pars(mu, sigma) +``` + +# Maximal coupling update +```{R} +# prior hyperparameters +mu <- 0 +sigma <- 2 +# link function (1 = logit, 2 = probit, 3 = cloglog) +link <- 1 + +# maximal number of SBC iterations +niter <- 20 + +# number of SBC simulations per iteration +nsims <- 1000 + +# number of draws per posterior approximation +ndraws <- 100 + +# number of observations +nobs <- 1 + +# number of binomial trials per observation +nsize <- 2 +for (j in 1:niter) { + post_draws_eta <- c() + eta <- rnorm(nsims, mu[j], sigma[j]) + for (i in 1:nsims) { + draws_new <- get_posterior(eta[i], mu = mu[j], sigma = sigma[j]) + post_draws_eta <- c(post_draws_eta, draws_new) + } + + hist(post_draws_eta, probability = TRUE, 30) + xval <- seq(min(post_draws_eta), max(post_draws_eta), length.out = 100) + lines(xval, dnorm(xval, mu[j], sigma[j])) + + breaks <- seq(-10, 10, by = .1) + # p is categorized samples of prior + p <- hist(eta, breaks = breaks)$counts / length(eta) + # q is categorized samples of data-averaged posterior + q <- hist(post_draws_eta, breaks = breaks)$counts / length(post_draws_eta) + w = 1 - sum(abs(p - q)) + pqmin = pmin(p, q) + Z <- sum(pqmin) + # common random number generation for coupling + u <- runif(1) + if (u < w){ + tmp <- runif(pqmin / Z) + p_coup <- tmp + q_coup <- tmp + }else{ + p_coup <- runif((p - pqmin)/ 1 - Z) + q_coup <- runif((q - pqmin)/ 1 - Z) + } + # p_new is the new marginal sample for the next step prior + mu_new <- mean(p_coup) + mu[j+1] <- mu_new + message("mu : ", mu[j], " mu_est : ", mu_est, " mu_new : ", mu_new) + + sigma_new <- sd(p_coup) + sigma[j+1] <- sigma_new + message("sigma : ", sigma[j], " sigma_est : ", sigma_est, " sigma_new : ", sigma_new) +} + +plot(mu) +plot(sigma) +``` + diff --git a/vignettes/iter-binom-logistic.html b/vignettes/iter-binom-logistic.html new file mode 100644 index 0000000..29f0371 --- /dev/null +++ b/vignettes/iter-binom-logistic.html @@ -0,0 +1,341 @@ + + + + + + + + + + + + + +iter-binomial-laplace + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + + + + +

    Proposition: SBC iteration would converge to the distribution that respects computation model.

    +

    This is due to the recurrence of well-calibrated regions which would be illustrated in experiment 1 which shows different priors converging to the same distribution. We view this as model bootstrap where likelihood and inference algorithm form a transition and automatically finds the best prior pair. Experiment 2 introduces a quantile-based hyperparameter gradient update which fastens this converge. The convergence is shown in 1-Wasserstein distance. Further attemps are applying this calibration boost algorithm to different likelihood and inference algorithm pairs which are known to have selective well-calibrated parameter region. Examples are as follows:

    +

    variance parameter starting from boundary-mode distribution - likelihood: hierarchical - inference algorithm: HMC, variantional inference

    +

    coefficient parameter starting from fat-tailed distribution - likelihood: logit-link generalized linear model - inference algorithm: HMC, variantional inference

    +
    +

    Experient 1.

    +

    Target parameter is logit-transformed probability, \(a\). Binomial likelihood and laplace approximation inference algorithm on logit scale is used. Hyperparameters for laplace approximation are \(\mu, \sigma\) which correspond to posterior distribution mode and second derivative at the mode. These hyperparameter values are set as the prior parameter for the iteration. Results show starting from \(N(0, 1^2)\) distribution, initial non-normal distribution slowly transforms to normal form to adjust to the constraints imposed by the approximation of inference algorithm, in this case normal distribution.

    +
    model = stan_model("./models/binom-laplace.stan")
    +
    ## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
    +## clang -mmacosx-version-min=10.13 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DBOOST_NO_AUTO_PTR  -include '/Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/usr/local/include   -fPIC  -Wall -g -O2  -c foo.c -o foo.o
    +## In file included from <built-in>:1:
    +## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
    +## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Dense:1:
    +## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Core:88:
    +## /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:1: error: unknown type name 'namespace'
    +## namespace Eigen {
    +## ^
    +## /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:16: error: expected ';' after top level declarator
    +## namespace Eigen {
    +##                ^
    +##                ;
    +## In file included from <built-in>:1:
    +## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
    +## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Dense:1:
    +## /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Core:96:10: fatal error: 'complex' file not found
    +## #include <complex>
    +##          ^~~~~~~~~
    +## 3 errors generated.
    +## make: *** [foo.o] Error 1
    +
    SBC_iter <- 91
    +# prior hyperparameters
    +mu <- 0
    +sigma <- 10
    +mu_lst <- list()
    +sigma_lst <- list()
    +# the number of dataset
    +nsims <- 91
    +# outcome dimension for each dataset
    +nobs <- 2
    +# posterior samples for each dataset
    +ndraws <- 10
    +# number of binomial trials
    +nsize <- 2
    +for (j in 1:SBC_iter){
    +  post_draws_a <- c()
    +  a <- rnorm(nsims, mu, sigma)
    +  for (i in 1:nsims) {
    +    p <- invlogit(a[i])
    +    y <- rbinom(nobs, nsize, p)
    +    dat <- list(Y=as.array(y), nsize=nsize, nobs=nobs, mu = mu, sigma = sigma)
    +    fit <- optimizing(model, data = dat, hessian = TRUE)
    +    
    +    # approximate posterior mean via posterior mode
    +    post_mean_a <- fit$par["a"]
    +    
    +    # approximate posterior sd via (sqrt) of the inverse negative Hessian
    +    post_sd_a <- sqrt(solve(-fit$hessian))
    +    post_draws_a <- c(post_draws_a, rnorm(ndraws, post_mean_a, post_sd_a))
    +  }
    +  if ((j-1) %% 30 ==0){
    +    hist(invlogit(post_draws_a), xlim = range(0,1), main = paste(j, "th itheration histogram"))  
    +  }
    +  
    +  # update hyperparameters depending on inference algorithm
    +  mu_est <- mean(post_draws_a)
    +  mu <- mu_est
    +  sigma_est <- sd(post_draws_a)
    +  sigma <- sigma_est
    +  # compare with previous hyperparameters
    +  mu_lst <- c(mu_lst, mu)
    +  sigma_lst <- c(sigma_lst, sigma)
    +}
    +

    +
    plot(unlist(mu_lst))
    +

    +
    plot(unlist(sigma_lst))
    +

    +

    The second experiment with different prior, \(N(1, 1^2)\) which is unsymmetrical unlike \(N(0, 10^2)\), also converge to the same distribution. A likeable explanation is the recurrence of parameter values within well-calibrated region as opposed to that are not. For instance, if \(logit(p)\) starts from 0.9, is likely to form a non-normal posterior whose mode is more likely to move away from 0.9. On the other hand, parameter values near .5 forms a symmetric and stable posterior which in most cases has its mode near .5.

    +
    +
    +

    Experiment 2.

    +
    # change prior hyperparameters
    +mu <- 1
    +sigma <- 1
    +SBC_iter <- 91
    +mu_lst <- list()
    +sigma_lst <- list()
    +for (j in 1:SBC_iter){
    +  post_draws_a <- c()
    +  a <- rnorm(nsims, mu, sigma)
    +  for (i in 1:nsims) {
    +    p <- invlogit(a[i])
    +    y <- rbinom(nobs, nsize, p)
    +    dat <- list(Y=as.array(y), nsize=nsize, nobs=nobs, mu = mu, sigma = sigma)
    +    fit <- optimizing(model, data = dat, hessian = TRUE)
    +    
    +    # approximate posterior mean via posterior mode
    +    post_mean_a <- fit$par["a"]
    +    
    +    # approximate posterior sd via (sqrt) of the inverse negative Hessian
    +    post_sd_a <- sqrt(solve(-fit$hessian))
    +    post_draws_a <- c(post_draws_a, rnorm(ndraws, post_mean_a, post_sd_a))
    +  }
    +  if ((j-1) %% 30 == 0){
    +    hist(invlogit(post_draws_a), xlim = range(0,1), main = paste(j, "th itheration histogram"))  
    +  }
    +  
    +  # update hyperparameters depending on inference algorithm
    +  mu_est <- mean(post_draws_a)
    +  mu <- mu_est
    +  sigma_est <- sd(post_draws_a)
    +  sigma <- sigma_est
    +  # compare with previous hyperparameters
    +
    +  mu_lst <- c(mu_lst, mu)
    +  sigma_lst <- c(sigma_lst, sigma)
    +}
    +

    +
    plot(unlist(mu_lst))
    +

    +
    plot(unlist(sigma_lst))
    +

    +
    + + + + +
    + + + + + + + + + + + + + + + diff --git a/vignettes/iter-cloglogBern-laplace.Rmd b/vignettes/iter-cloglogBern-laplace.Rmd new file mode 100644 index 0000000..c94c823 --- /dev/null +++ b/vignettes/iter-cloglogBern-laplace.Rmd @@ -0,0 +1,176 @@ +--- +title: "iter-cloglogBern-laplace" +output: html_document +--- + +# Experiment 3. Likelihood and prior distribution family + +Experiment 3-1 changes $p(y|\theta)$ and show a very degenerate type of convergence. Experiment 3-2 alleviate this by selecting a more expressive prior distribution family. + +## Experiment 3-1.inverse c-log-log which is asymmetric +Contrary to inverse logit, asymmetrical inverse complementary log-log prior predictive converges but to a rather degenerate one. This is expected given that there is no symmetry point in the middle that emits a fixed point. + +```{r, warning=FALSE, error=FALSE} +model = stan_model("./models/binom-laplace.stan") +SBC_iter <- 1001 +# prior hyperparameters +mu <- 1 +sigma <- 10 +mu_lst <- c() +sigma_lst <- c() +# experiment settings +## the number of dataset +nsims <- 100 +## outcome dimension for each dataset +nobs <- 2 +## posterior samples for each dataset +ndraws <- 100 +## number of binomial trials +nsize <- 1 + +for (j in 1:SBC_iter){ + post_draws_theta <- c() + theta <- rnorm(nsims, mu, sigma) + for (i in 1:nsims) { + p <- invlogit(theta[i]) + y <- rbinom(nobs, nsize, p) + dat <- list(Y=as.array(y), nsize=nsize, nobs=nobs, mu = mu, sigma = sigma) + fit <- optimizing(model, data = dat, hessian = TRUE) + + # approximate posterior mean via posterior mode + post_mean_theta <- fit$par["theta"] + + # approximate posterior sd via (sqrt) of the inverse negative Hessian + post_sd_theta <- sqrt(solve(-fit$hessian)) + post_draws_theta <- c(post_draws_theta, rnorm(ndraws, post_mean_theta, post_sd_theta)) + } + + # regularizer: update hyperparameters + mu_est <- mean(post_draws_theta) + mu <- mu_est + sigma_est <- sd(post_draws_theta) + sigma <- sigma_est + + if ((j-1) %% 30 ==0){ + hist(invlogit(post_draws_theta), xlim = range(0,1), main = paste(j, "th itheration histogram")) + } + mu_lst <- c(mu_lst, mu) + sigma_lst <- c(sigma_lst, sigma) +} +plot(unlist(mu_lst), ylab = "prior mean") +plot(unlist(sigma_lst), ylab = "prior sd") +``` + +## Experiment 3-2. mixture normal family +Contrary to inverse logit, asymmetrical inverse complementary log-log prior predictive converges but to a rather degenerate one. This is expected given that there is no symmetry point in the middle that emits a fixed point. + +```{r, warning=FALSE, error=FALSE} +#model = stan_model("./models/binom-laplace.stan") +SBC_iter <- 1000 +# prior hyperparameters +mu <- 3 +sigma <- 1 +mu_lst <- c() +sigma_lst <- c() +mu_hat_lst <- c() +# the number of dataset +nsims <- 100 +# outcome dimension for each dataset +nobs <- 10 +# posterior samples for each dataset +ndraws <- 100 +# number of binomial trials +nsize <- 1 +mu_dist <- rnorm(nsims, mu, sigma) +sigma_dist <- rnorm(nsims, sigma, sigma * 0.1) +for (j in 1:SBC_iter){ + post_draws_theta <- c() + theta <- rnorm(nsims, mu, sigma) + for (i in 1:nsims) { + p <- invlogit(theta[i]) + y <- rbinom(nobs, nsize, p) + dat <- list(Y=as.array(y), nsize=nsize, nobs=nobs, mu = mu, sigma = sigma) + fit <- optimizing(model, data = dat, hessian = TRUE) + + # approximate posterior mean via posterior mode + post_mean_theta <- fit$par["theta"] + + # approximate posterior sd via (sqrt) of the inverse negative Hessian + post_sd_theta <- sqrt(solve(-fit$hessian)) + + mu_hat_lst <- c(mu_hat_lst, post_mean_theta) + post_draws_theta <- c(post_draws_theta, rnorm(ndraws, post_mean_theta, post_sd_theta)) + } + + # regularizer: update hyperparameters + mu_dist <- update_quantile_approximation(mu_dist, mu_hat_lst, nsims, 1000, 0.001) + mu <- mu_est + sigma_est <- sd(post_draws_theta) + sigma <- sigma_est + + if ((j-1) %% 30 ==0){ + hist(invlogit(post_draws_theta), xlim = range(0,1), main = paste(j, "th itheration histogram")) + } + mu_lst <- c(mu_lst, mu) + sigma_lst <- c(sigma_lst, sigma) +} +plot(unlist(mu_lst), ylab = "prior mean") +plot(unlist(sigma_lst), ylab = "prior sd") +``` + +# Experiment 4.-2. Change 1sc-log-log, less symmetric prior predictive distribution +Contrary to inverse logit, asymmetrical inverse complementary log-log prior predictive converges but to a rather degenerate one. This is expected given that there is no symmetry point in the middle that emits a fixed point. + +```{r, warning=FALSE, error=FALSE} +#model = stan_model("./models/binom-laplace.stan") +SBC_iter <- 1000 +# prior hyperparameters +mu <- 3 +sigma <- 1 +mu_lst <- c() +sigma_lst <- c() +mu_hat_lst <- c() +# the number of dataset +nsims <- 100 +# outcome dimension for each dataset +nobs <- 10 +# posterior samples for each dataset +ndraws <- 100 +# number of binomial trials +nsize <- 1 +mu_dist <- rnorm(nsims, mu, sigma) +sigma_dist <- rnorm(nsims, sigma, sigma * 0.1) +for (j in 1:SBC_iter){ + post_draws_theta <- c() + theta <- rnorm(nsims, mu, sigma) + for (i in 1:nsims) { + p <- invlogit(theta[i]) + y <- rbinom(nobs, nsize, p) + dat <- list(Y=as.array(y), nsize=nsize, nobs=nobs, mu = mu, sigma = sigma) + fit <- optimizing(model, data = dat, hessian = TRUE) + + # approximate posterior mean via posterior mode + post_mean_theta <- fit$par["theta"] + + # approximate posterior sd via (sqrt) of the inverse negative Hessian + post_sd_theta <- sqrt(solve(-fit$hessian)) + + mu_hat_lst <- c(mu_hat_lst, post_mean_theta) + post_draws_theta <- c(post_draws_theta, rnorm(ndraws, post_mean_theta, post_sd_theta)) + } + + # regularizer: update hyperparameters + mu_dist <- update_quantile_approximation(mu_dist, mu_hat_lst, nsims, 1000, 0.001) + mu <- mu_est + sigma_est <- sd(post_draws_theta) + sigma <- sigma_est + + if ((j-1) %% 30 ==0){ + hist(invlogit(post_draws_theta), xlim = range(0,1), main = paste(j, "th itheration histogram")) + } + mu_lst <- c(mu_lst, mu) + sigma_lst <- c(sigma_lst, sigma) +} +plot(unlist(mu_lst), ylab = "prior mean") +plot(unlist(sigma_lst), ylab = "prior sd") +``` diff --git a/vignettes/iter_overview.png b/vignettes/iter_overview.png new file mode 100644 index 0000000..742749a Binary files /dev/null and b/vignettes/iter_overview.png differ diff --git a/vignettes/limits_of_SBC.Rmd b/vignettes/limits_of_SBC.Rmd index 869a8a8..e4ee66b 100644 --- a/vignettes/limits_of_SBC.Rmd +++ b/vignettes/limits_of_SBC.Rmd @@ -20,12 +20,13 @@ actual algorithms. ```{r setup, message=FALSE,warning=FALSE, results="hide"} library(SBC) library(ggplot2) -use_cmdstanr <- TRUE # Set to false to use rstan instead +use_cmdstanr <- getOption("SBC.vignettes_cmdstanr", TRUE) # Set to false to use rstan instead if(use_cmdstanr) { library(cmdstanr) } else { library(rstan) + rstan_options(auto_write = TRUE) } options(mc.cores = parallel::detectCores()) @@ -39,7 +40,11 @@ plan(multisession) options(SBC.min_chunk_size = 5) # Setup caching of results -cache_dir <- "./limits_SBC_cache" +if(use_cmdstanr) { + cache_dir <- "./_limits_SBC_cache" +} else { + cache_dir <- "./_limits_rstan_SBC_cache" +} if(!dir.exists(cache_dir)) { dir.create(cache_dir) } @@ -98,11 +103,11 @@ if(use_cmdstanr) { } ``` -And here we simulate from a student's t distribution. We scale the distribution so that the `sigma` parameter +And here we simulate from a student's t distribution. We scale the distribution so that `sigma` is the standard deviation of the distribution. ```{r} -single_dataset_minor <- function(N) { +single_sim_minor <- function(N) { mu <- rnorm(n = 1, mean = 0, sd = 1) sigma <- abs(rnorm(n = 1, mean = 0, sd = 1)) nu <- 5 @@ -110,20 +115,20 @@ single_dataset_minor <- function(N) { y <- mu + student_scale * rt(N, df = nu) list( - parameters = list(mu = mu, sigma = sigma), + variables = list(mu = mu, sigma = sigma), generated = list(N = N, y = y) ) } set.seed(51336848) -generator_minor <- SBC_generator_function(single_dataset_minor, N = 10) -datasets_minor <- generate_datasets(generator_minor, n_datasets = 200) +generator_minor <- SBC_generator_function(single_sim_minor, N = 10) +datasets_minor <- generate_datasets(generator_minor, n_sims = 200) ``` -Can we see something by looking at the results of just the first 10 datasets? (note that `SBC_datasets` objects support subsetting). +Can we see something by looking at the results of just the first 10 simulations? (note that `SBC_datasets` objects support subsetting). ```{r} -results_minor_10 <- compute_results(datasets_minor[1:10], backend_minor, +results_minor_10 <- compute_SBC(datasets_minor[1:10], backend_minor, cache_mode = "results", cache_location = file.path(cache_dir, "minor_10")) ``` @@ -135,19 +140,19 @@ plot_rank_hist(results_minor_10) plot_ecdf_diff(results_minor_10) ``` -Will we have better luck with 100 datasets? (Note that we can use `bind_results` to combine multiple results, -letting us start small, but not throw away the computation spent for the initial SBC runs) +Will we have better luck with 100 simulations? (Note that we can use `bind_results` to combine multiple results, +letting us start small, but not throw away the computation spent for the initial simulations) ```{r} results_minor_100 <- bind_results( results_minor_10, - compute_results(datasets_minor[11:100], backend_minor, + compute_SBC(datasets_minor[11:100], backend_minor, cache_mode = "results", cache_location = file.path(cache_dir, "minor_90")) ) ``` -Here we see something suspicios with the `sigma` parameter, but it is not very convincing. +Here we see something suspicios with the `sigma` variable, but it is not very convincing. ```{r results_minor_100_plots} plot_rank_hist(results_minor_100) @@ -159,7 +164,7 @@ So let's do additional 100 SBC steps ```{r} results_minor_200 <- bind_results( results_minor_100, - compute_results(datasets_minor[101:200], backend_minor, + compute_SBC(datasets_minor[101:200], backend_minor, cache_mode = "results", cache_location = file.path(cache_dir, "minor_next_100")) ) @@ -194,7 +199,7 @@ coverage <- empirical_coverage(results_minor_200$stats, width = c(0.5,0.9,0.95)) coverage sigma_90_coverage_string <- paste0(round(100 * as.numeric( - coverage[coverage$parameter == "sigma" & coverage$width == 0.9, c("ci_low","ci_high")])), + coverage[coverage$variable == "sigma" & coverage$width == 0.9, c("ci_low","ci_high")])), "%", collapse = " - ") ``` @@ -215,19 +220,19 @@ SBC will not notice if you completely omit likelihood from your Stan model! Here we have a generator for a very simple model with gaussian likelihood: ```{r} -single_dataset_missing <- function(N) { +single_sim_missing <- function(N) { mu <- rnorm(n = 1, mean = 0, sd = 1) y <- rnorm(n = N, mean = mu, sd = 1) list( - parameters = list(mu = mu), + variables = list(mu = mu), generated = list(N = N, y = y) ) } set.seed(25746223) -generator_missing <- SBC_generator_function(single_dataset_missing, N = 10) -datasets_missing <- generate_datasets(generator_missing, n_datasets = 200) +generator_missing <- SBC_generator_function(single_sim_missing, N = 10) +datasets_missing <- generate_datasets(generator_missing, n_sims = 200) ``` @@ -258,10 +263,10 @@ if(use_cmdstanr) { ``` -Now we'll compute the results for 200 simulated datasets: +Now we'll compute the results for 200 simulations: ```{r} -results_missing <- compute_results(datasets_missing, backend_missing, +results_missing <- compute_SBC(datasets_missing, backend_missing, cache_mode = "results", cache_location = file.path(cache_dir, "missing")) ``` @@ -278,7 +283,7 @@ It's just nothing out of the ordinary. But we are not completely helpless: This specific type of problem can be noticed by prior/posterior contraction plot. In this plot we compare the prior and posterior standard deviation to get a measure -of how much more we know about the parameter after fitting the model. +of how much more we know about the variable after fitting the model. For this model, we can get the prior sd directly, but one can also use a (preferably large) `SBC_datasets` object to estimate it empirically for more complex models. @@ -301,7 +306,7 @@ plot_sim_estimated(results_missing, alpha = 0.5) There is however even more powerful method - and that is to include the likelihood in the SBC. This is most easily done by adding a "generated quantity" to the SBC results - this is a function -that is evaluated within the context of the parameters AND data. +that is evaluated within the context of the variables AND data. And it can be added without recomputing the fits! ```{r} @@ -312,7 +317,7 @@ normal_lpdf <- function(y, mu, sigma) { log_lik_gq <- generated_quantities(log_lik = normal_lpdf(y, mu, 1), .globals = "normal_lpdf" ) -results_missing_gq <- recompute_statistics( +results_missing_gq <- recompute_SBC_statistics( results_missing, datasets_missing, backend = backend_missing, gen_quants = log_lik_gq) ``` @@ -350,10 +355,10 @@ if(use_cmdstanr) { ``` -Let us use this model for the same dataset. +Let us use this model for the same set of simulations. ```{r} -results_missing_2 <- compute_results(datasets_missing, backend_missing_2, gen_quants = log_lik_gq, +results_missing_2 <- compute_SBC(datasets_missing, backend_missing_2, gen_quants = log_lik_gq, cache_mode = "results", cache_location = file.path(cache_dir, "missing_2")) ``` @@ -361,13 +366,13 @@ results_missing_2 <- compute_results(datasets_missing, backend_missing_2, gen_qu The contraction plot would not show anything suspicious - we get decent contraction ```{r results_missing_2_contraction} -plot_contraction(results_missing_2, prior_sd, parameters = "mu") +plot_contraction(results_missing_2, prior_sd, variables = "mu") ``` Similarly, our posterior estimates now cluster around the true values. ```{r results_missing_2_sim_estimated} -plot_sim_estimated(results_missing_2, parameters = "mu", alpha = 0.5) +plot_sim_estimated(results_missing_2, variables = "mu", alpha = 0.5) ``` diff --git a/vignettes/logit-probit-iter.Rmd b/vignettes/logit-probit-iter.Rmd new file mode 100644 index 0000000..4aaa7e5 --- /dev/null +++ b/vignettes/logit-probit-iter.Rmd @@ -0,0 +1,131 @@ +--- +title: "logit-probit-iter" +output: html_document +--- + +--- +title: "iter-binomial-laplace" +output: html_document +--- +```{r setup, include=FALSE, warning=FALSE} +library(SBC) +library(cmdstanr) +library(parallel) +library(bayesplot) +library(posterior) +library(dplyr) +library(rstan) +library(future) +library(ggpubr) +library(mclust) +library(rstanarm) +library(ggplot2) +library(NormalLaplace) +options(mc.cores = parallel::detectCores()) +plan(multisession) +options(SBC.min_chunk_size = 5) +set.seed(1984) +``` + +#mean, sd of the whole sample +```{r setup, include=FALSE} +model = stan_model("./models/binom-laplace.stan") +SBC_iter <- 101 +# prior hyperparameters +mu <- 0 +sigma <- 10 +mu_lst <- list() +sigma_lst <- list() +# the number of dataset +nsims <- 100 +# outcome dimension for each dataset +nobs <- 1000 +# posterior samples for each dataset +ndraws <- 10 +# number of binomial trials +nsize <- 1 +for (j in 1:SBC_iter){ + post_draws_theta <- c() + theta <- rnorm(nsims, mu, sigma) + # compute ecdf + ys <- matrix(nrow = nsims, ncol = nobs) + for (s in 1:nsims){ + ys[s,] <- as.numeric(runif(nobs) < invlogit(theta[s])) + } + # F(G^{-1}(U)) = U; view parameter and data as one + post_draws_theta <- as.numeric(ys) + # update hyperparameter + mu_est <- mean(post_draws_theta) + mu <- mu_est + sd_est <- sd(post_draws_theta) + sd <- sd_est + message("mu : ", round(mu, 2), ", mu_est : ", round(mu_est, 2), ", sigma : ", round(sigma, 2), ", sigma_est : ", round(sigma_est, 2)) + # if the above doesn't work, do optimize with the whole data +# fit <- optimizing(model, data = dat, hessian = TRUE) +# # approximate posterior mean via posterior mode +# post_mean_a <- fit$par["a"] +# # approximate posterior sd via (sqrt) of the inverse negative Hessian +# post_sd_a <- sqrt(solve(-fit$hessian)) +# post_draws_a <- c(post_draws_a, rnorm(ndraws, post_mean_a, post_sd_a)) +# sigma_est <- sd(post_draws_a) +# sigma <- sigma_est + #if ((j-1) %% 30 ==0){ + hist(invlogit(post_draws_theta), xlim = range(0,1), main = paste(j, "th itheration histogram")) + #} + + # compare with previous hyperparameters + mu_lst <- c(mu_lst, mu) + sigma_lst <- c(sigma_lst, sigma) +} +plot(unlist(mu_lst), ylab = "prior mean") +plot(unlist(sigma_lst), ylab = "prior sd") +``` +#mode of each dataset +```{r setup, include=FALSE} +model = stan_model("./models/binom-laplace.stan") +SBC_iter <- 101 +# prior hyperparameters +mu <- 0.1 +sigma <- 1 +mu_lst <- list() +sigma_lst <- list() +# the number of dataset +nsims <- 100 +# outcome dimension for each dataset +nobs <- 1000 +# posterior samples for each dataset +ndraws <- 100 +# number of binomial trials +nsize <- 1 +for (j in 1:SBC_iter){ + post_draws_theta <- c() + theta <- rnorm(nsims, mu, sigma) + # compute ecdf + ys <- matrix(nrow = nsims, ncol = nobs) + for (s in 1:nsims){ + ys[s,] <- as.numeric(runif(nobs) < invlogit(theta[s])) + dat <- list(Y=as.array(ys[s,]), nsize=nsize, nobs=nobs, mu = mu, sigma = sigma) + fit <- optimizing(model, data = dat, hessian = TRUE) + # approximate posterior mean via posterior mode + post_mean_theta <- fit$par["a"] + # approximate posterior sd via (sqrt) of the inverse negative Hessian + post_sd_theta <- sqrt(solve(-fit$hessian)) + post_draws_theta <- c(post_draws_theta, rnorm(ndraws, post_mean_a, post_sd_a)) + hist(post_draws_theta) + } + hist(post_draws_theta) + # update hyperparameters depending on inference algorithm + mu_est <- mean(post_draws_theta) + mu <- mu_est + sigma_est <- sd(post_draws_theta) + sigma <- sigma_est + message("mu : ", round(mu, 2), ", mu_est : ", round(mu_est, 2), ", sigma : ", round(sigma, 2), ", sigma_est : ", round(sigma_est, 2)) + if ((j-1) %% 30 ==0){ + hist(invlogit(post_draws_theta), xlim = range(0,1), main = paste(j, "th itheration histogram")) + } + mu_lst <- c(mu_lst, mu) + sigma_lst <- c(sigma_lst, sigma) +} +plot(unlist(mu_lst), ylab = "prior mean") +plot(unlist(sigma_lst), ylab = "prior sd") +``` diff --git a/vignettes/models/binom-laplace.stan b/vignettes/models/binom-laplace.stan new file mode 100644 index 0000000..d2b4150 --- /dev/null +++ b/vignettes/models/binom-laplace.stan @@ -0,0 +1,39 @@ +data { + int nobs; // total number of observations + int Y[nobs]; // outcome + int nsize; + int link; + int dist_types; + real lambda_arg1; + real lambda_arg2; + +} +parameters { + real eta; +} +transformed parameters { + // lambda_multiple options for link functions + real p; + if (link == 1) { + p = inv_logit(eta); + } else if (link == 2) { + p = Phi(eta); + } else if (link == 3) { + p = inv_cloglog(eta); + } +} +model { + if(dist_types == 1){ + #eta ~ normal(lambda_arg1, lambda_arg2); + eta ~ normal(lambda_arg1, lambda_arg2); + } + else if(dist_types == 2){ + eta ~ gamma(lambda_arg1, lambda_arg2); + } + else{ + reject("non-supportive distribution type"); + } + + + Y ~ binomial(nsize, p); +} diff --git a/vignettes/models/binom-laplace_gmm.stan b/vignettes/models/binom-laplace_gmm.stan new file mode 100644 index 0000000..6038aff --- /dev/null +++ b/vignettes/models/binom-laplace_gmm.stan @@ -0,0 +1,33 @@ +data { + int nobs; // total number of observations + int Y [nobs]; // outcome variable + // fixed arguments for distribution + int nsims; // next simulation number + int nsize; // total simulation number + // updating arguments for approximation + vector[nsims] lambda_mu; // mean values for gmm prior + real lambda_log_sigma; // bandwidth(sd) for gmm prior + int link; +} + +parameters { + real eta; // population-level effects +} + +transformed parameters { + // lambda_multiple options for link functions + real p; + if (link == 1) { + p = inv_logit(eta); + } else if (link == 2) { + p = Phi(eta); + } else if (link == 3) { + p = inv_cloglog(eta); + } +} + +model { + target += binomial_logit_lpmf(Y | nsize, eta); + // priors including constants + target += normal_lpdf(eta | lambda_mu, exp(lambda_log_sigma)) - log(nsims); +} diff --git a/vignettes/models/eightschools_cp_calib.stan b/vignettes/models/eightschools_cp_calib.stan new file mode 100644 index 0000000..0545d53 --- /dev/null +++ b/vignettes/models/eightschools_cp_calib.stan @@ -0,0 +1,21 @@ +data { + int J; // number of schools + real y[J]; // estimated treatment + real sigma[J]; // std of estimated effect + + int nsims; + vector[nsims] mm_mean; + real mm_bandwidth; +} +parameters { + real theta[J]; // treatment effect in school j + real mu; // hyper-parameter of mean + real tau; // hyper-parameter of sdv +} +model { + #tau ~ cauchy(0, 5); // a non-informative prior + target += normal_lpdf(tau | mm_mean, mm_bandwidth) - log(nsims); + theta ~ normal(mu, tau); + y ~ normal(theta, sigma); + mu ~ normal(0, 5); +} diff --git a/vignettes/models/eightschools_cp_posteriordb.stan b/vignettes/models/eightschools_cp_posteriordb.stan new file mode 100644 index 0000000..18a1a72 --- /dev/null +++ b/vignettes/models/eightschools_cp_posteriordb.stan @@ -0,0 +1,32 @@ +data { + int J; // number of schools + real y[J]; // estimated treatment + real sigma[J]; // std of estimated effect + real lambda_mu; + real lambda_log_var; +} + +transformed data { + real lambda_sigma = sqrt(exp(lambda_log_var)); +} + +parameters { + vector[J] theta_trans; + real mu; // hyper-parameter of mean + real log_tau; +} + +transformed parameters { + real tau = exp(log_tau); + vector[J] theta; + theta = theta_trans * tau + mu; +} + +model { + theta_trans ~ normal(0, 1); + //tau ~ cauchy(0, 5); + log_tau ~ normal(lambda_mu, lambda_sigma); + y ~ normal(theta, sigma); + mu ~ normal(0, 5); + target += log_tau; // jacobian adjustment +} diff --git a/vignettes/models/eightschools_ncp_posteriordb.stan b/vignettes/models/eightschools_ncp_posteriordb.stan new file mode 100644 index 0000000..4f05ce6 --- /dev/null +++ b/vignettes/models/eightschools_ncp_posteriordb.stan @@ -0,0 +1,33 @@ +data { + int J; // number of schools + real y[J]; // estimated treatment + real sigma[J]; // std of estimated effect + real lambda_mu; + real lambda_var; +} + +transformed data { + real lambda_sigma = sqrt(lambda_var); +} + +parameters { + vector[J] theta_trans; + real mu; // hyper-parameter of mean + //real log_tau; + real tau; +} + +transformed parameters { + //real tau = exp(log_tau); + vector[J] theta; + theta = theta_trans * tau + mu; +} + +model { + theta_trans ~ normal(0, 1); + //tau ~ cauchy(0, 5); + tau ~ normal(lambda_mu, lambda_sigma); + //log_tau ~ normal(lambda_mu, lambda_sigma); + y ~ normal(theta, sigma); + mu ~ normal(0, 5); +} diff --git a/vignettes/models/gamma-reg.stan b/vignettes/models/gamma-reg.stan new file mode 100644 index 0000000..b38734d --- /dev/null +++ b/vignettes/models/gamma-reg.stan @@ -0,0 +1,73 @@ +data { + int nobs; // total number of observations + vector[nobs] Y; // response variable + int K; // number of population-level effects + matrix[nobs, K] X; // population-level design matrix + //real shape; // shape parameter + int dist_types[2]; + real lambda_arg1[2]; + real lambda_arg2[2]; + + + +} +parameters { + real a; // population-level effects + vector[K] b; + real shape; +} + +model { + // initialize linear predictor term + vector[nobs] logmu = a + X * b; + vector[nobs] mu; + for (n in 1:nobs) { + // apply the inverse link function + mu[n] = exp(logmu[n]); + } + + for(n in 1:nobs){ + target += gamma_lpdf(Y[n] | shape, shape / mu[n]); + } + + // priors including constants + //target += gamma_lpdf(shape | lambda_alpha, lambda_beta); + //target += normal_lpdf(a | 2, 5); + target += normal_lpdf(b[1] | 0, 1); + target += normal_lpdf(b[2] | 0, 1); + target += normal_lpdf(b[3] | 0, 1); + target += normal_lpdf(b[4] | 0, 1); + target += normal_lpdf(b[5] | 0, 1); + target += normal_lpdf(b[6] | 0, 1); + target += normal_lpdf(b[7] | 0, 1); + target += normal_lpdf(b[8] | 0, 1); + target += normal_lpdf(b[9] | 0, 1); + target += normal_lpdf(b[10] | 0, 1); + target += normal_lpdf(b[11] | 0, 1); + target += normal_lpdf(b[12] | 0, 1); + target += normal_lpdf(b[13] | 0, 1); + target += normal_lpdf(b[14] | 0, 1); + target += normal_lpdf(b[15] | 0, 1); + + + if (dist_types[1] == 1){ // shape + target += normal_lpdf(shape | lambda_arg1[1], lambda_arg2[1]); + } + else if(dist_types[1] == 2){ + target += gamma_lpdf(shape | lambda_arg1[1], lambda_arg2[1]); + } + else if(dist_types[1] == 3){ + target += lognormal_lpdf(shape | lambda_arg1[1], lambda_arg2[1]); + } + + + if (dist_types[2] == 1){ // a + target += normal_lpdf(a | lambda_arg1[2], lambda_arg2[2]); + } + else if(dist_types[2] == 2){ + target += gamma_lpdf(a | lambda_arg1[2], lambda_arg2[2]); + } + else if(dist_types[2] == 3){ + target += lognormal_lpdf(a | lambda_arg1[2], lambda_arg2[2]); + } +} diff --git a/vignettes/models/gamma-reg_gmm.stan b/vignettes/models/gamma-reg_gmm.stan new file mode 100644 index 0000000..3ba9ab6 --- /dev/null +++ b/vignettes/models/gamma-reg_gmm.stan @@ -0,0 +1,44 @@ + +data { + int nobs; // total number of observations + vector[nobs] Y; // response variable + int npredictors; // number of population-level effects + matrix[nobs, npredictors] X; // population-level design matrix + real shape; // shape parameter + int nsims; // next simulation number + vector[nsims] mm_mean; // mean values for gmm prior + real mm_bandwidth; // bandwidth(sd) for gmm prior +} +parameters { + real a; // population-level effects + vector[npredictors] b; +} + +model { + // initialize linear predictor term + vector[nobs] mu = a + X * b; + for (n in 1:nobs) { + // apply the inverse link function + mu[n] = shape * exp(-(mu[n])); + } + target += gamma_lpdf(Y | shape, mu); + + // priors including constants + + target += normal_lpdf(a | mm_mean, mm_bandwidth) - log(nsims); + target += normal_lpdf(b[1] | 0, 1); + target += normal_lpdf(b[2] | 0, 1); + target += normal_lpdf(b[3] | 0, 1); + target += normal_lpdf(b[4] | 0, 1); + target += normal_lpdf(b[5] | 0, 1); + target += normal_lpdf(b[6] | 0, 1); + target += normal_lpdf(b[7] | 0, 1); + target += normal_lpdf(b[8] | 0, 1); + target += normal_lpdf(b[9] | 0, 1); + target += normal_lpdf(b[10] | 0, 1); + target += normal_lpdf(b[11] | 0, 1); + target += normal_lpdf(b[12] | 0, 1); + target += normal_lpdf(b[13] | 0, 1); + target += normal_lpdf(b[14] | 0, 1); + target += normal_lpdf(b[15] | 0, 1); +} diff --git a/vignettes/models/two_normal.stan b/vignettes/models/two_normal.stan new file mode 100644 index 0000000..d1201cc --- /dev/null +++ b/vignettes/models/two_normal.stan @@ -0,0 +1,16 @@ +data { + int N; + vector[N] y; + real prior_width; +} + +parameters { + real loc; + real scale; +} + +model { + loc ~ normal(0, prior_width); + scale ~ lognormal(0, prior_width); + y ~ normal(loc, scale); +} \ No newline at end of file diff --git a/vignettes/models/two_normal_hp.stan b/vignettes/models/two_normal_hp.stan new file mode 100644 index 0000000..28f4f20 --- /dev/null +++ b/vignettes/models/two_normal_hp.stan @@ -0,0 +1,21 @@ +data { + // hyperparams + int N; + real loc_mu; + real loc_sd; + real scale_mu; + real scale_sd; + // outcome + vector[N] y; +} + +parameters { + real loc; + real scale; +} + +model { + loc ~ normal(loc_mu, loc_sd); + scale ~ lognormal(scale_mu, scale_sd); + y ~ normal(loc, scale); +} diff --git a/vignettes/other_models/changepoint.jags b/vignettes/other_models/changepoint.jags new file mode 100644 index 0000000..155dbb1 --- /dev/null +++ b/vignettes/other_models/changepoint.jags @@ -0,0 +1,14 @@ +data { + for(i in 1:T) { + prior_s[i] = 1.0/T + } +} + +model { + e ~ dexp(r_e); + l ~ dexp(r_l); + s ~ dcat(prior_s) + for(i in 1:T) { + y[i] ~ dpois(ifelse(i < s, e, l)) + } +} diff --git a/vignettes/other_models/changepoint_marginalized.jags b/vignettes/other_models/changepoint_marginalized.jags new file mode 100644 index 0000000..e165446 --- /dev/null +++ b/vignettes/other_models/changepoint_marginalized.jags @@ -0,0 +1,40 @@ +data { + for(i in 1:T) { + prior_unif[i] = -log(T) + } + + # Using the zeroes crossing trick to compute the likelihood + # See e.g. https://667-per-cm.net/2014/02/17/the-zero-crossings-trick-for-jags-finding-roots-stochastically/ + z = 0 +} + +model { + e ~ dexp(r_e); + l ~ dexp(r_l); + + # Prepare the zero trick + z ~ dpois(z_mean) + + # Compute the likelihood + # The lp is a matrix to avoid having to redefine nodes + lp[1, 1:T] = prior_unif + for (s in 1:T) { + for (t in 1:T) { + lp[1 + t, s] = lp[t, s] + log(ifelse(t < s, e, l)) * y[t] - ifelse(t < s, e, l) + } + p[s] = exp(lp[T + 1, s]) + } + + # log-sum-exp to compute the log likelihood in a numerically stable way + m = max(lp[T + 1, ]) + sum_exp_rest[1] = 0 + for(t in 1:T) { + sum_exp_rest[1 + t] = sum_exp_rest[t] + exp(lp[T + 1, s] - m) + } + lp_total = m + log(sum_exp_rest[T + 1]) + + # We have the likelihood now add it to z_mean for the zeros trick + z_mean = -lp_total + 10000 + + s ~ dcat(p) +} diff --git a/vignettes/rank_visualizations.Rmd b/vignettes/rank_visualizations.Rmd new file mode 100644 index 0000000..ea1817a --- /dev/null +++ b/vignettes/rank_visualizations.Rmd @@ -0,0 +1,245 @@ +--- +title: "SBC rank visualizations" +author: "Martin Modrák" +date: "`r Sys.Date()`" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{SBC rank visualizations} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +This vignette aims to explain how to interpret various visualizations of +the main results of SBC: the posterior ranks. If the model + algorithm works +correctly, the posterior ranks of the prior draws should be distributed uniformly. +While one could test +for this uniformity numerically via some sort of statistical test, it is often +more informative to look at visualizations that not only tell us whether a +problem appears, but what kind of problem and how severe it is. + +In this vignette, we'll first explain each of the visualizations individually + and then show them side-by-side for the same configuration. + + +```{r setup, message=FALSE,warning=FALSE, results="hide"} +library(bayesplot) +library(SBC) +library(ggplot2) +library(patchwork) +``` + +## Simulation setup + +We'll use the built-in example that simulates ranks from known prior and +data-averaged posterior distributions. + +```{r} +set.seed(22654885) +res_50 <- SBC_example_results("visualizations", n_sims = 50) +# The example results have this extra attribute showing analytic densities, +# this is not a part of normal SBC results +density_df <- attr(res_50, "density_df") +``` + +This is how the corresponding analytical densities look like - in the +"Exact match" case they overlap and SBC should pass, in all other cases it +should signal issues. + +```{r densities} +plot_density_comparison <- function(density_df) { + ggplot(density_df, aes(x = x, y = density, color = type, size = type)) + + geom_line(data = dplyr::filter(density_df, type == "Prior")) + + geom_line(data = dplyr::filter(density_df, type == "Data-averaged posterior"), alpha = 0.8) + + facet_wrap(~variable) + + scale_color_manual("", values = c("Prior" = "skyblue1", + "Data-averaged posterior" = "black")) + + scale_size_manual("", values = c("Prior" = 2, + "Data-averaged posterior" = 1)) + + theme(legend.position = "bottom") +} + +plot_density_comparison(density_df) + +``` + +Now let's look how these issues manifest in various plots: + +## `plot_rank_hist` - The rank histogram + + +Rank histogram is probably the simplest of the visualizations. We plot a histogram +of the ranks and look if all bins are roughly equally represented. +The expected average count is shown as a horizontal black line, and an approximate +interval highlighting expected deviations (by default 95%) is shown as the light blue wedged rectangle in the background. + +```{r rank_hist_50_default} +plot_rank_hist(res_50) +``` + + +The two main disadvantages of this plot are: + +1. Since the confidence interval is only approximate, it cannot be taken too seriously. However, gross violations are still immediately visible. +2. It is sensitive to the choice of number of bins. There is a tradeoff in that more bins mean more resolution, but less power to detect some violations of uniformity. Additionally, one needs to be mindful of the total number of ranks as the number of bins should preferably divide the number of ranks. + +This is the reason why the "Some extra-low estimates" case is not visible with the default number of bins - the extra low estimates (and thus high ranks) get smoothed by being combined into a wider bin. We can plot the same results but with the maximum number of bins (100, as the ranks in our case range from 0 to 99): + +```{r rank_hist_50_100} +plot_rank_hist(res_50, bins = 100) +``` + +This lets us see something suspicious for the "Some extra-low estimates" case, but we've increased +noise overall and the other patterns become harder to see. + +Additionally, if the number of bins does not divide the total number of ranks (here 100) neatly, +some bins are expected to get slightly more ranks than others. The plot compensates for this by extending the confidence interval to cover both cases, resulting in some loss of precision - here a particularly bad choice of the number of bins obscures problems in "Model too uncertain" and "Some extra-low estimates". + +```{r rank_hist_50_17} +plot_rank_hist(res_50, bins = 17) +``` + +Choice of number of bins obviously becomes less of a problem, if we have a large number of simulations. +With 1000 simulations, the patterns are clear and unmistakable. + +```{r rank_hist_1000} +res_1000 <- SBC_example_results("visualizations", n_sims = 1000) +plot_rank_hist(res_1000) +``` + +We should also note that since we +are essentially performing many comparisons, seeing a low number of "failures" in some bins is to be expected and does not necessarily signal a problem - in the plot above the "Exact match" case has several bins outside of the approximate confidence interval. + + +## `plot_ecdf` and `plot_ecdf_diff` - ECDF plots + +These two related plots remedy the main problems of the rank histogram - they do not depend on any binning and provide exact confidence intervals. The ECDF plot shows the empirical cumulative distribution function (ECDF). If ranks were perfectly uniform, this would be a "diagonal staircase", but some deviations from exact uniformity are to be expected. The ECDF plot shows aN ellipse outlining the expected deviations (by default at the 95% level). It looks like this: + +```{r ecdf_50} +plot_ecdf(res_50) +``` + +A minor problem with this visualization is that the top-left and bottom-right parts of the plot are usually left unused and as the number of simulations grows, it may become hard to discern details in the center. Let us look at the same plot from 1000 simulations: + +```{r ecdf_1000} +plot_ecdf(res_1000) +``` + +Now it gets a bit hard to see, whether the "Exact match" case is well within the ellipse or rather hitting the boundaries. + +The ECDF diff plot shows exactly the same information as the ECDF plot, but looks not at the ECDF itself, +but rather on the _difference_ between the perfectly uniform CDF and the ECDF. In other words, it rotates the ECDF plot by 45 degrees to the right to make the uniform CDF a flat line: + +```{r ecdf_diff_1000} +plot_ecdf_diff(res_1000) +``` + +Now, we get a much increased resolution for the "Exact match" case. Also note that +in the rank histogram the "Some extra-low estimates" case showed only as a failure in the highest ranks. However, the ECDF and ECDF diff plots also show how the slight under-abundance of the low ranks - which is not noticeable when looking at each rank / rank bin individually - slowly adds up and by 50th percentile we already see a problem. + +The ECDF diff plot usually looks better than the ECDF plot even with lower number of simulations and is thus preferable to ECDF in most cases: + +```{r ecdf_diff_50} +plot_ecdf_diff(res_50) +``` + +A downside of the ECDF and especially the ECDF diff plot is that the connection between the shape seen in the plot and the type of the failure is less straightforward. + +## `plot_coverage` and `plot_coverage_diff` - Empirical coverage + + +The rank histogram and both ECDF plots are useful for noticing that there is a problem in the model and what type of mismatch are we seeing. However, it is +a bit harder to interpret how bad the failures actually are for inference and how +large problems could still be unnoticed because we ran too few simulations. + +The empirical coverage tries to help with that. It builds on the `empirical_coverage()` function and by default shows the coverage of the central posterior credible intervals (coverage is the proportion of true variable values that fall within the interval). A well working model would have coverage exactly match interval width (i.e. 95% credible interval contains the true value 95% of the time) as shown by the blue line. +The focus on central intervals is often more relevant to inference than the +leftmost intervals implied in the ECDF plots. The coverage is accompanied by approximate credible intervals for the coverage (gray). + + +```{r coverage_50} +plot_coverage(res_50) +``` + +This lets us neatly see that with 50 simulations, we still cannot rule even relatively large miscalibration in the "Exact match" case where e.g. the 50% central interval could still contain about 70% of the true values. A downside of the focus on central intervals is that underestimation and overestimation now produce the same overall shape in the plot. + + +For similar reasons as with the ECDF plot, there is also "difference" version of the plot that takes the _differences_ in coverage into focus. + +```{r coverage_diff_50} +plot_coverage_diff(res_50) +``` + +In the example here, all the problematic scenarios manifest also as problems on the empirical coverage plot. However, empirical coverage, especially for the central intervals has some notable limitations as a diagnostic and thus should always be complemented by a rank histogram / ECDF plot - see `help(empirical_coverage)` for some additional details. + + + + +## Side by side comparison + +To let us better understand how the various plots relate, we will know plot +the scenarios one by one, showing all plots for the same scenario side-by-side. + +```{r} +plot_side_by_side <- function(res, var) { + legend_bottom <- theme(legend.position = "bottom", + legend.direction = "vertical", + legend.margin = margin(t=-1, unit = "cm") + ) + # Hack - use variable name to show plot type + density_df_to_plot <- dplyr::filter(density_df, variable == var) + density_df_to_plot$variable <- "Densities" + + stats <- dplyr::filter(res$stats, variable == var) + + p_dens <- plot_density_comparison(density_df_to_plot) + + legend_bottom + p_rank <- plot_rank_hist(dplyr::mutate(stats, variable = "Rank histogram")) + p_ecdf <- plot_ecdf(dplyr::mutate(stats, variable = "ECDF")) + legend_bottom + p_ecdf_diff <- plot_ecdf_diff(dplyr::mutate(stats, variable = "ECDF diff")) + legend_bottom + p_coverage <- plot_coverage(dplyr::mutate(stats, variable = "Coverage")) + p_coverage_diff <- plot_coverage_diff(dplyr::mutate(stats, variable = "Coverage diff")) + p_dens + p_ecdf + p_ecdf_diff + p_rank + p_coverage + p_coverage_diff + + plot_annotation(var) +} +``` + +We will start with the "Exact match" (i.e. no problem) scenario with 50 simulations. + +```{r side_by_side_50_exact_match} +plot_side_by_side(res_50, "Exact match") +``` + +The relative utility of the `_diff` versions of the plots changes if we have more simulations: + + +```{r side_by_side_1000_exact_match} +plot_side_by_side(res_1000, "Exact match") +``` + +If the model is too certain, it will have over-abundance of extreme ranks and lower +than expected coverage. + +```{r side_by_side_50_too_certain} +plot_side_by_side(res_50, "Model too certain") +``` + +If the model is overly uncertain, it will have overabundance of central ranks (and too few extreme ranks) and the coverage will be higher than expected. + +```{r side_by_side_50_too_uncertain} +plot_side_by_side(res_50, "Model too uncertain") +``` + +If the model is underestimating, we will see too many high ranks and coverage will be lower than expected. + +```{r side_by_side_50_underest} +plot_side_by_side(res_50, "Model underestimating") +``` + +If the model is overestimating we will see too many low ranks, while the effect on central interval coverage will be similar to underestimation and the coverage will be lower than expected. + + +```{r side_by_side_50_overest} +plot_side_by_side(res_50, "Model overestimating") +``` + + diff --git a/vignettes/rejection_sampling.Rmd b/vignettes/rejection_sampling.Rmd index 492f534..2df315b 100644 --- a/vignettes/rejection_sampling.Rmd +++ b/vignettes/rejection_sampling.Rmd @@ -1,20 +1,20 @@ --- -title: Rejection sampling in dataset generation +title: Rejection sampling in simulations author: "Martin Modrák" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: toc: yes vignette: > - %\VignetteIndexEntry{Rejection sampling in dataset generation} + %\VignetteIndexEntry{Rejection sampling in simulations} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} --- -In some cases, one may want to exclude extreme datasets from SBC (e.g. because -those datasets create divergences). It is best to use +In some cases, one may want to exclude extreme simulations from SBC (e.g. because +those simulations create divergences). It is best to use prior predictive checks to examine your priors and change them -to avoid the extreme datasets. In some cases, this may however be impractical/impossible to +to avoid extremes in the simulated data. In some cases, this may however be impractical/impossible to do via prior choice - one example are regression coefficients, where once we have many predictors, any independent prior that is not very strict will lead to unrealistic predictions. Joint priors are needed in such case, but @@ -23,9 +23,9 @@ those are not well understood and easy to use. See for more context. An alternative is to use _rejection sampling_ i.e. we repeatedly generate a -dataset and only accept it as a dataset when it passes a certain condition we impose +simulation and only accept it when it passes a certain condition we impose (e.g. that no observed count is larger than $10^8$). -But does rejection sampling when generating datasets affect the validity of SBC? +But does rejection sampling when generating simulations affect the validity of SBC? Thanks to forum user Niko Huurre who derived the necessary math at [Stan Discourse discussion of the topic](https://discourse.mc-stan.org/t/using-narrower-priors-for-sbc/21709/6?u=martinmodrak) @@ -33,7 +33,7 @@ we know exactly when it is OK. Briefly: for algorithms that only need to know the posterior density up to a constant (which includes Stan and many others), it is OK as long as the rejection criterion only -uses observed data and not the unobserved parameters. +uses observed data and not the unobserved variables. We'll first walk through the math and then show examples of both OK and problematic rejection sampling. @@ -41,7 +41,7 @@ rejection sampling. ## The math -Let $f\left(y\right)$ be the probability that the simulated dataset $y$ is rejected (usually a 0-1 function if you have a clear idea what a "bad" dataset looks like, but could be probabilistic if you're relying on finicky diagnostics). The important numbers are the probability of rejection for parameter $\theta$ +Let $f\left(y\right)$ be the probability that the simulated data $y$ is rejected (usually a 0-1 function if you have a clear idea what a "bad" dataset looks like, but could be probabilistic if you're relying on finicky diagnostics). The important numbers are the probability of rejection for variable $\theta$ $$ L\left(\theta\right)=\int f\left(y\right)\pi\left(y|\theta\right)\mathrm{d}y @@ -53,7 +53,7 @@ $$ R=\iint f\left(y\right)\pi\left(y|\theta\right)\pi\left(\theta\right)\mathrm{d}y\mathrm{d}\theta=\int L\left(\theta\right)\pi\left(\theta\right)\mathrm{d}\theta $$ -Rejecting the parameter draw when it generates a “bad” dataset effectively distorts the prior +Rejecting the variable draw when it generates “bad” data effectively distorts the prior $$ \pi\left(\theta\right)\to\frac{L\left(\theta\right)}{R}\pi\left(\theta\right) @@ -77,7 +77,7 @@ $$ \pi(\theta | y) \propto \frac{L(\theta)}{R} \pi(y | \theta) \frac{f(y)}{L(\theta)} \pi(\theta) = \frac{f(y)}{R} \pi(y | \theta) \pi(\theta) $$ -And since $\frac{f(y)}{R}$ is a constant for any given dataset (and hence the fit), +And since $\frac{f(y)}{R}$ is a constant for any given simulation (and hence the fit), the overall posterior for Stan (and most other MCMC algorithms) is the same, because Stan only needs the posterior density up to a constant. So whether we take rejection into account or not, the model will match the generating process. @@ -91,12 +91,13 @@ So let's see if that also happens in practice. Let's setup our environment: ```{r setup, message=FALSE,warning=FALSE, results="hide"} library(SBC) -use_cmdstanr <- TRUE # Set to false to use rstan instead +use_cmdstanr <- getOption("SBC.vignettes_cmdstanr", TRUE) # Set to false to use rstan instead if(use_cmdstanr) { library(cmdstanr) } else { library(rstan) + rstan_options(auto_write = TRUE) } library(bayesplot) @@ -108,7 +109,11 @@ plan(multisession) options(SBC.min_chunk_size = 10) # Setup caching of results -cache_dir <- "./rejection_sampling_SBC_cache" +if(use_cmdstanr) { + cache_dir <- "./_rejection_sampling_SBC_cache" +} else { + cache_dir <- "./_rejection_sampling_rstan_SBC_cache" +} if(!dir.exists(cache_dir)) { dir.create(cache_dir) } @@ -140,7 +145,7 @@ N <- 10 generator <- SBC_generator_function(function() { mu <- rnorm(1, 0, 2) list( - parameters = list(mu = mu), + variables = list(mu = mu), generated = list(N = N, y = rnorm(N, mu, 1)) ) }) @@ -154,7 +159,7 @@ datasets <- generate_datasets(generator, 1000) ``` ```{r} -results <- compute_results(datasets, backend, keep_fits = FALSE, +results <- compute_SBC(datasets, backend, keep_fits = FALSE, cache_mode = "results", cache_location = file.path(cache_dir, "no_rejections")) ``` @@ -166,12 +171,12 @@ plot_rank_hist(results) Indeed, all looks good. -### Rejection based on parameter values +### Rejection based on unobserved variables -Now let us modify the generator to reject based on parameter values. +Now let us modify the generator to reject based on values of an unobserved variable. ```{r} -generator_reject_param <- SBC_generator_function(function() { +generator_reject_unobserved <- SBC_generator_function(function() { repeat { mu <- rnorm(1, 0, 2) if(mu > 3) { @@ -179,7 +184,7 @@ generator_reject_param <- SBC_generator_function(function() { } } list( - parameters = list(mu = mu), + variables = list(mu = mu), generated = list(N = N, y = rnorm(N, mu, 1)) ) }) @@ -189,18 +194,18 @@ We don't even need to run very many fits to see the problem. ```{r} set.seed(21455) -datasets_reject_param <- generate_datasets(generator_reject_param, 200) +datasets_reject_unobserved <- generate_datasets(generator_reject_unobserved, 200) ``` ```{r} -results_reject_param <- compute_results(datasets_reject_param, backend, keep_fits = FALSE, +results_reject_unobserved <- compute_SBC(datasets_reject_unobserved, backend, keep_fits = FALSE, cache_mode = "results", - cache_location = file.path(cache_dir, "reject_param")) + cache_location = file.path(cache_dir, "reject_unobserved")) ``` -```{r reject_param_plots} -plot_ecdf_diff(results_reject_param) -plot_rank_hist(results_reject_param) +```{r reject_unobserved_plots} +plot_ecdf_diff(results_reject_unobserved) +plot_rank_hist(results_reject_unobserved) ``` Indeed, we see a clear failure. @@ -209,7 +214,7 @@ Indeed, we see a clear failure. But what if we reject based on the values of data? This should in theory result in just a constant change in posterior density and not affect SBC. (SBC will however then check only the -non-rejected parts of the data space). We will do a relatively aggressive rejection scheme (reject more than 50% of datasets). +non-rejected parts of the data space). We will do a relatively aggressive rejection scheme (reject more than 50% of simulations). ```{r} generator_reject_y <- SBC_generator_function(function() { @@ -221,7 +226,7 @@ generator_reject_y <- SBC_generator_function(function() { } } list( - parameters = list(mu = mu), + variables = list(mu = mu), generated = list(N = N, y = y) ) }) @@ -233,7 +238,7 @@ datasets_reject_y <- generate_datasets(generator_reject_y, 1000) ``` ```{r} -results_reject_y <- compute_results(datasets_reject_y, backend, keep_fits = FALSE, +results_reject_y <- compute_SBC(datasets_reject_y, backend, keep_fits = FALSE, cache_mode = "results", cache_location = file.path(cache_dir, "reject_y")) ``` @@ -247,9 +252,9 @@ We see that even with quite heavy rejection based on y, SBC to a high resolution ## Take home message -If our priors can sometimes generate datasets that are unrealistic, but we are unable to +If our priors can sometimes result in simulated data that is unrealistic, but we are unable to specify a better prior directly (e.g. because we would need to define some sort of joint prior), -we can use rejection sampling to prune unrealistic datasets as long as we only filter by the observed -data and don't directly use any unobserved parameter values. Notably, filtering based on divergences or other +we can use rejection sampling to prune unrealistic simulations as long as we only filter by the observed +data and don't directly use any unobserved variable values. Notably, filtering based on divergences or other fitting issues is also just a function of data and thus permissible. The resulting SBC will however provide guarantees -only for datasets that would not be rejected by the same criteria. +only for data that would not be rejected by the same criteria. diff --git a/vignettes/rmarkdown_images/modrak_sbc_results.png b/vignettes/rmarkdown_images/modrak_sbc_results.png new file mode 100644 index 0000000..aa840b7 Binary files /dev/null and b/vignettes/rmarkdown_images/modrak_sbc_results.png differ diff --git a/vignettes/self-calibration-adaptive-binom.Rmd b/vignettes/self-calibration-adaptive-binom.Rmd new file mode 100644 index 0000000..28c87dc --- /dev/null +++ b/vignettes/self-calibration-adaptive-binom.Rmd @@ -0,0 +1,155 @@ +--- +title: "self-calibration-adaptive" +author: "Hyunji Moon, Shinyoung Kim" +output: + html_document: default + pdf_document: default +--- +```{r setup, include=FALSE, warning=FALSE} +library(SBC) +library(cmdstanr) +library(parallel) +library(bayesplot) +library(posterior) +library(dplyr) +library(rstan) +library(future) +library(ggpubr) +library(rstanarm) +library(ggplot2) +library(mclust) +options(mc.cores = parallel::detectCores()) +plan(multisession) +options(SBC.min_chunk_size = 5) +#set.seed(1984) +``` + +We introduce a self-calibration algorithm. First, dap operator: $P \rightarrow P$ is $\hat{g}(f(\Phi X, U), \Phi X)$. Limiting to normal is inspected to verify its sublinearity and monotonicity. Then, based on this hyperparmeter self-calibration algorithm is suggested. Final improvement of calibration is compared with SBC rank plot. + +We finally apply the self-calibration algorithm to the centered eightschools model, starting from an unstable region and attempting to find a self-consistent region. We found that for models that impose constraints, reparameterizing the model to include an unconstrained parameter, run self calibration in the unconstrained space, and the applying constraints manually showed the best results. + +```{R, warning=FALSE, error=FALSE} +## Generator settings +# number of SBC simulations per iteration (generator) +nsims <- 200 + +# number of observations +nobs <- 10#2 + +# link function (1 = logit, 2 = probit, 3 = cloglog) +link <- 1 + +# number of binomial trials per observation +nsize <- 10 + +## Backend settings +# number of draws per posterior approximation +ndraws <- 100 + +# number of chains for hmc posterior approximation +nchains <- 2 +``` + +# Inspecting DAP of binom-laplace +```{R, warning=FALSE, error=FALSE} +generator_binom <- function(lambdas, fixed_args){ + # fixed value across simulated datasets + # experiment settings + nobs <- fixed_args$nobs + nsize <- fixed_args$nsize + dist_types <- fixed_args$dist_types + # modular settings + link_type <- fixed_args$link_type + + # generate + lambda_arg1 <- c() + lambda_arg2 <- c() + if(dist_types$eta == "normal"){ + eta <- rnorm(1, mean = lambdas$eta$mu, sd=lambdas$eta$sigma) + lambda_arg1 <- c(lambda_arg1, lambdas$eta$mu) + lambda_arg2 <- c(lambda_arg2, lambdas$eta$sigma) + } + else if(dist_types$eta == "gamma"){ + eta <- rgamma(1, shape = lambdas$eta$alpha, rate = lambdas$eta$beta) + lambda_arg1 <- c(lambda_arg1, lambdas$eta$alpha) + lambda_arg2 <- c(lambda_arg2, lambdas$eta$beta) + } + + + + mu <- invtf_param_vec(eta, link_type = link_type) + Y <- rbinom(nobs, size = nsize, prob = mu) + list( + parameters = list(eta = eta), + generated = list(nobs= nobs, nsize = nsize, link = link_type, + dist_types = match(unlist(dist_types), c("normal", "gamma")), lambda_arg1 = lambda_arg1, lambda_arg2 = lambda_arg2, + Y = Y) + ) +} + +fixed_args_binom <- list(nobs = nobs, nsize = nsize, link_type = 1, nsims = nsims, ndraws = ndraws, dist_types=list(eta="normal")) + +``` + +# Self-calibration of binomial-laplace +Initially start from badly calibrated region: +```{R, warning=FALSE, error=FALSE} +# initial prior hyperparameters +lambda_init_binom <- list( + eta = list(mu=100, sigma=100) +) +datasets_binom <- generate_datasets(SBC_generator_function(generator_binom, lambda_init_binom, fixed_args_binom), n_datasets = fixed_args_binom$nsims) + +# hyperparameter update algorithm +updator = "mc_update" + +# maximal number of SBC iterations +niter <- 100 + +# tolerance +tol <- 0.1 + +# learning rate +gamma <- 1.5 # 0.5 for gradient update, 10 for normal_str_update + +# step2: inferring posterior +rstan_binom_mod <- stan_model("models/binom-laplace.stan") +cmdstan_binom_mod <- cmdstanr::cmdstan_model("models/binom-laplace.stan") + +backend_binom_opt <- SBC_backend_rstan_optimizing(rstan_binom_mod, draws = ndraws) +#backend_binom_hmc <- SBC_backend_cmdstan_sample(cmdstan_binom_mod, chains = 4, iter_sampling = ndraws / 4) # thin = 10 + +# initial badly calibrated +result_binom_opt <- compute_results(datasets_binom, backend_binom_opt, thin_ranks = 1) +plot_rank_hist(result_binom_opt) +# result_binom_hmc <- compute_results(datasets_binom, backend_binom_hmc) + +``` + +# Run self-calibration for binomial-laplace approximation model + +```{R, warning=FALSE, error=FALSE} +knitr::opts_chunk$set(error = TRUE) +# wrapper function to follow `self_calib_adaptive` interface +calib_generator <- function(lambdas, fixed_args){ + generate_datasets(SBC_generator_function(generator_binom, lambdas, fixed_args), n_datasets = fixed_args$nsims) +} +sc_binom_opt <- self_calib_adaptive(calib_generator, backend_binom_opt, updator, c("eta"), lambda_init_binom, nsims, gamma, tol, fixed_args = fixed_args_binom) +``` +### Iteration details: +```{R, warning=FALSE, error=FALSE} +sc_binom_opt$t_df +``` + +```{R} +ggplot(sc_binom_opt$t_df) + geom_point(aes(x=iter, y=eta_lambda_loss)) +``` + +### Calculate rank from calibrated lambdas + +```{R, warning=FALSE, error=FALSE} +datasets_binom_new <- generate_datasets(SBC_generator_function(generator_binom, sc_binom_opt$lambda, fixed_args_binom), n_datasets = fixed_args_binom$nsims) +result_binom_opt_new <- compute_results(datasets_binom_new, backend_binom_opt, thin_ranks = 1) + +plot_rank_hist(result_binom_opt_new) +``` diff --git a/vignettes/self-calibration-adaptive-eightschools.Rmd b/vignettes/self-calibration-adaptive-eightschools.Rmd new file mode 100644 index 0000000..6edad45 --- /dev/null +++ b/vignettes/self-calibration-adaptive-eightschools.Rmd @@ -0,0 +1,164 @@ +--- +title: "self-calibration-adaptive" +author: "Hyunji Moon, Shinyoung Kim" +output: + html_document: default + pdf_document: default +--- +```{r setup, include=FALSE, warning=FALSE} +library(SBC) +library(cmdstanr) +library(parallel) +library(bayesplot) +library(posterior) +library(dplyr) +library(rstan) +library(future) +library(ggpubr) +library(rstanarm) +library(ggplot2) +library(mclust) +options(mc.cores = parallel::detectCores()) +plan(multisession) +options(SBC.min_chunk_size = 5) +#set.seed(1984) +``` + +```{R, warning=FALSE, error=FALSE} +## Generator settings +# number of SBC simulations per iteration (generator) +nsims <- 200 + +# number of observations +nobs <- 10#2 + +# link function (1 = logit, 2 = probit, 3 = cloglog) +link <- 1 + +# number of binomial trials per observation +nsize <- 10 + +## Backend settings +# number of draws per posterior approximation +ndraws <- 100 + +# number of chains for hmc posterior approximation +nchains <- 2 +``` + +```{R, warning=FALSE, error=FALSE} +## Generator settings +# number of SBC simulations per iteration (generator) +nsims <- 200 + +# number of observations +nobs <- 10#2 + +# link function (1 = logit, 2 = probit, 3 = cloglog) +link <- 1 + +# number of binomial trials per observation +nsize <- 10 + +## Backend settings +# number of draws per posterior approximation +ndraws <- 100 + +# number of chains for hmc posterior approximation +nchains <- 2 +``` + +# Inspecting rank plots of eightschools +Since tau requires a positive constraint, we will be working with log(tau). +```{R, warning=FALSE, error=FALSE} +generator_eightschools_cp <- function(lambda_mu, lambda_sigma, fixed_args){ + # fixed value across simulated datasets + nsims <- fixed_args$nsims + J <- fixed_args$J + sigma <- fixed_args$sigma + + # Draw tau from the designated normal distribuiton + a_bar <- -lambda_mu/lambda_sigma + # tau <- NaN + # while(TRUE){ + # u <- runif(1)$ + # x_bar <- sqrt(a_bar^2 - 2 * log(1 - u)) + # nu <- runif(1) + # tau <- lambda_sigma * x_bar + lambda_mu + # if(nu <= x_bar/a_bar) break + # } + log_tau <- rnorm(1, lambda_mu, lambda_sigma) + tau <- exp(log_tau) + + + + # other parameters are drawn from the default prior + mu = rnorm(1, 0, 5) + theta <- rnorm(1, mu, tau) + # draw y from simulated parameters + y <- rnorm(J, mu, sigma) + + list( + parameters = list(log_tau = log_tau), + generated = list( + J = J, + y = y, + sigma = sigma, + nsims = nsims, + lambda_mu = lambda_mu, + lambda_log_sigma = log(lambda_sigma) + ) + ) +} + +cmdstan_mod_eightschools <- cmdstanr::cmdstan_model("models/eightschools_cp_posteriordb.stan") +backend_eightschools_hmc <- SBC_backend_cmdstan_sample(cmdstan_mod_eightschools, chains = 4, iter_sampling = ndraws / 4) # thin = 10 +fixed_args_eightschools <- list(J = 8, nsims = nsims, sigma = c(15, 10, 16, 11, 9, 11, 10, 18), nsims=nsims) +datasets <- generate_datasets(SBC_generator_function(generator_eightschools_cp, 0, 1, fixed_args_eightschools), n_datasets = fixed_args_eightschools$nsims) +sbc_result <- compute_results(datasets, backend_eightschools_hmc, thin_ranks = 4) +plot_rank_hist(sbc_result) +``` + +# Run self calibration for centered eight schools model + +We now repeat the above procedure with eight-school's log(tau). We start at N(10, 10), which upon inspection of the DAP distribution, results in a very narrow distribution near 0. Rank histogram plots also imply extreme underdispersion. But after 6 self-calibration iterations, we can observe the extreme rank plots being tamed, with the DAP distribution and the lambda hyperdistribution being similar. +```{R, warning=FALSE, error=FALSE} +# hyperparameter update algorithm +updator = "normal_str_update" + +# maximal number of SBC iterations +niter <- 100 + +# tolerance +tol <- 0.1 + +# learning rate +gamma <- 1.5 # 0.5 for gradient update, 10 for normal_str_update + +lambda_mu = 10 +lambda_sigma = 10 +cmdstan_mod_eightschools <- cmdstanr::cmdstan_model("models/eightschools_cp_posteriordb.stan") +backend_eightschools_hmc <- SBC_backend_cmdstan_sample(cmdstan_mod_eightschools, chains = 4, iter_sampling = ndraws / 4) # thin = 10 +fixed_args_eightschools <- list(J = 8, nsims = nsims, sigma = c(15, 10, 16, 11, 9, 11, 10, 18), nsims=nsims) +datasets <- generate_datasets(SBC_generator_function(generator_eightschools_cp, 0, 1, fixed_args_eightschools), n_datasets = fixed_args_eightschools$nsims) +result_eightschools_hmc <- compute_results(datasets, backend_eightschools_hmc, thin_ranks = 4) +# initial badly calibrated +plot_rank_hist(result_eightschools_hmc) +calib_generator <- function(lambda_mu, lambda_sigma, fixed_args){ + generate_datasets(SBC_generator_function(generator_eightschools_cp, lambda_mu, lambda_sigma, fixed_args), n_datasets = fixed_args$nsims) +} + +sc_hmc_eightschools <- self_calib_adaptive(calib_generator, backend_eightschools_hmc, updator, "log_tau", lambda_mu, lambda_sigma, nsims, gamma, tol, fixed_args = fixed_args_eightschools) + +datasets_eightschools_new <- generate_datasets(SBC_generator_function(generator_eightschools_cp, sc_hmc_eightschools$mu, sc_hmc_eightschools$sigma, fixed_args_eightschools), n_datasets = fixed_args_eightschools$nsims) +result_eightschools_hmc_new <- compute_results(datasets_eightschools_new, backend_eightschools_hmc) +plot_rank_hist(result_eightschools_hmc_new) +ggplot(sc_hmc_eightschools$t_df) + geom_point(aes(x=iter, y=lambda_loss)) +# plot calibration result +``` + +### Iteration details: + +```{R, warning=FALSE, error=FALSE} +sc_hmc_eightschools$t_df +``` diff --git a/vignettes/self-calibration-adaptive-gamma.Rmd b/vignettes/self-calibration-adaptive-gamma.Rmd new file mode 100644 index 0000000..8754869 --- /dev/null +++ b/vignettes/self-calibration-adaptive-gamma.Rmd @@ -0,0 +1,210 @@ +--- +title: "self-calibration-adaptive for Gamma regression" +author: "Hyunji Moon, Shinyoung Kim" +output: + html_document: default + pdf_document: default +--- +```{r setup, include=FALSE, warning=FALSE} +library(SBC) +library(cmdstanr) +library(parallel) +library(bayesplot) +library(posterior) +library(dplyr) +library(rstan) +library(future) +library(ggpubr) +library(rstanarm) +library(ggplot2) +library(mclust) +options(mc.cores = parallel::detectCores()) +#set.seed(1984) +``` + +```{R, warning=FALSE, error=FALSE} +## Generator settings +# number of SBC simulations per iteration (generator) +nsims <- 100 + +# number of observations +nobs <- 100 + +# link function (1 = logit, 2 = probit, 3 = cloglog) +link <- 1 + +# number of binomial trials per observation +nsize <- 10 + +## Backend settings +# number of draws per posterior approximation +ndraws <- 100 + +# number of chains for hmc posterior approximation +nchains <- 2 +``` + +# Inspecting DAP of gamma-regression-hmc + +We will calibrate the `shape` parameter, by changing the prior distribution for `shape`, as well as `a` + +```{R, warning=FALSE, error=FALSE} +generator_gr <- function(lambdas, fixed_args){ + # fixed value across simulated datasets + ## meta + nobs <- fixed_args$nobs + K <- fixed_args$K + dist_types <- fixed_args$dist_types + while(TRUE){ + # predictor + X <- array(rnorm(nobs * K, mean = 1, sd = 0.5), dim = c(nobs, K)) + b <- rnorm(K, mean = 0, sd = 1) + # generate + lambda_arg1 <- c() + lambda_arg2 <- c() + if(dist_types$shape == "normal"){ + shape <- rnorm(1, mean = lambdas$shape$mu, sd=lambdas$shape$sigma) + lambda_arg1 <- c(lambda_arg1, lambdas$shape$mu) + lambda_arg2 <- c(lambda_arg2, lambdas$shape$sigma) + } + else if(dist_types$shape == "gamma"){ + shape <- rgamma(1, shape = lambdas$shape$alpha, rate = lambdas$shape$beta) + lambda_arg1 <- c(lambda_arg1, lambdas$shape$alpha) + lambda_arg2 <- c(lambda_arg2, lambdas$shape$beta) + } + + if(dist_types$a == "normal"){ + a <- rnorm(1, mean = lambdas$a$mu, sd=lambdas$a$sigma) + lambda_arg1 <- c(lambda_arg1, lambdas$a$mu) + lambda_arg2 <- c(lambda_arg2, lambdas$a$sigma) + } + else if(dist_types$a == "gamma"){ + a <- rgamma(1, shape = lambdas$a$alpha, rate = lambdas$a$beta) + lambda_arg1 <- c(lambda_arg1, lambdas$a$alpha) + lambda_arg2 <- c(lambda_arg2, lambdas$a$beta) + } + + #a <- rnorm(1, mean = 2, sd = 5) + logmu <- as.numeric(a + X %*% b) + mu <- exp(logmu) + Y <- rgamma(nobs, shape = shape, rate = shape / mu) + if(!any(Y <= 1e-32)){ + return(list( + parameters = list(shape = shape), + generated = list(nobs= nobs, K = K, X = X, dist_types = match(unlist(dist_types), c("normal", "gamma")), + lambda_arg1 = lambda_arg1, lambda_arg2 = lambda_arg2, Y = Y) + ) + ) + } + } +} +fixed_args_gr <- list(nobs = nobs, K = 15, nsims = nsims, dist_types=list(shape="gamma", a="normal")) +cmdstan_mod_gr <- cmdstanr::cmdstan_model("models/gamma-reg.stan") +rstan_mod_gr <- stan_model("models/gamma-reg.stan") +backend_gr_opt <- SBC_backend_rstan_optimizing(rstan_mod_gr, draws = ndraws) +backend_gr_hmc <- SBC_backend_cmdstan_sample(cmdstan_mod_gr, chains = nchains, iter_sampling = ndraws / nchains) # thin = 10 +``` + +We start at an initially badly calibrated region: + +```{R, warning=FALSE, error=FALSE} +updator = "mc_update" + +# maximal number of SBC iterations +niter <- 100 + +# tolerance +tol <- 0.1 + +# learning rate +gamma <- 1.5 # 0.5 for gradient update, 10 for normal_str_update +nobs <- 10 +# prior hyperparameters +lambda_init_gamma <- list( + shape = list(alpha=2, beta=1), + a = list(mu=2, sigma=1) +) +fixed_args_gamma <- list(nobs = nobs, K = 15, nsims = nsims, dist_types=list(shape="gamma", a="normal")) +datasets_gamma <- generate_datasets(SBC_generator_function(generator_gr, lambda_init_gamma, fixed_args_gamma), n_datasets = nsims) +# combines target hp values with other hyperparameter settings +calib_generator <- function(lambda_init_gamma, fixed_args){ + generate_datasets(SBC_generator_function(generator_gr, lambda_init_gamma, fixed_args), n_datasets = fixed_args_gamma$nsims) +} +#rstan_mod_gr <- stan_model("models/gamma-reg.stan") +cmdstan_mod_gr <- cmdstanr::cmdstan_model("models/gamma-reg.stan") +# backend_gr_opt <- SBC_backend_rstan_optimizing(rstan_mod_gr, draws = ndraws) +# OPT DOES NOT WORK: result in Error in chol.default(-H) : the leading minor of order 13 is not positive definite + +backend_gr_hmc <- SBC_backend_cmdstan_sample(cmdstan_mod_gr, chains = 4, iter_sampling = ndraws / 4) # thin = 10 + +# initial badly calibrated +result_gr_hmc <- compute_results(datasets_gamma, backend_gr_hmc) +plot_rank_hist(result_gr_hmc) +``` + + + +```{R, warning = FALSE, error = FALSE} +calculate_dap <- function(mu, var, generator, backened, fixed_args){ + lambda_init_gamma <- list( + shape = list(alpha= mu^2 / var, beta= mu / var), + a = list(mu=2, var=1) + ) + datasets <- do.call(generator, list(lambda_init_gamma, fixed_args = fixed_args)) + sbc_result <- compute_results(datasets, backened, thin_ranks = 1) + draws_eta <- c() + for(fit in sbc_result$fits){ + samples <- SBC_fit_to_draws_matrix(fit) + draws_eta <- c(draws_eta, posterior::extract_variable(samples, "shape")) + } + # assume normal for dap + # mu <- mean(draws_eta) + # var <- sd(draws_eta)^2 + gamma_est <- MASS::fitdistr(draws_eta, "gamma", start=list(shape=1, rate=1))$estimate + alpha <- as.numeric(gamma_est["shape"]) + beta <- as.numeric(gamma_est["rate"]) + mu = alpha / beta + var = alpha / beta^2 + return(list(mu=mu, var=var, draws_eta=draws_eta)) +} + +prior_dap <- list(mu = c(), var = c(), dap_mu = c(), dap_var = c(), mu_loss = c(), var_loss = c()) +for (mu in seq(.1, .5, length.out = 4)){ + for(var in seq(.1, .5, length.out = 4)){ + prior_dap$mu <- c(prior_dap$mu, mu) + prior_dap$var <- c(prior_dap$var, var) + dap <- calculate_dap(mu, var, calib_generator, backend_gr_opt, fixed_args_gr) + prior_dap$dap_mu <- c(prior_dap$dap_mu, dap$mu) + prior_dap$dap_var <- c(prior_dap$dap_var, dap$var) + prior_dap$mu_loss <- c(prior_dap$mu_loss, mu - dap$mu) + prior_dap$var_loss <- c(prior_dap$var_loss, var - dap$var) + } +} +prior_dap <- data.frame(prior_dap) +ggplot(prior_dap)+ geom_point(aes(x=mu, y=var), color = "red") + geom_point(aes(x=dap_mu, y = dap_var), color = "blue") +ggplot(prior_dap_b) + geom_density_2d(aes(x=mu, y = sigma)) + geom_density_2d(aes(x=dap_mu, y = dap_sigma), color = "red") +library(plot3D) +persp3D(x=prior_dap$dap_mu, y=sigma_vals, z = convex_v, theta=30, phi=50, xlab="mu", ylab="var", zlab="convex") +``` + +We now run self-calibration + +```{R, warning=FALSE, error=FALSE} +knitr::opts_chunk$set(error = TRUE) +sc_opt_gr <- self_calib_adaptive(calib_generator, backend_gr_hmc, updator, c("shape", "a"), lambda_init_gamma, nsims, gamma, tol, fixed_args = fixed_args_gamma) +``` + +### Iteration details: + +```{R, warning=FALSE, error=FALSE} +sc_opt_gr$t_df +``` + +```{R, warning=FALSE, error=FALSE} +# after calibration +datasets_gr_new <- generate_datasets(SBC_generator_function(generator_gr, sc_opt_gr$lambda, fixed_args_gamma), n_datasets = fixed_args_gamma$nsims) +result_gr_hmc_new <- compute_results(datasets_gr_new, backend_gr_hmc) + +plot_rank_hist(result_gr_hmc_new) +ggplot(sc_opt_gr$t_df) + geom_point(aes(x=iter, y=shape_lambda_loss)) +``` diff --git a/vignettes/self-calibration.rmd b/vignettes/self-calibration.rmd new file mode 100644 index 0000000..fbcb3e0 --- /dev/null +++ b/vignettes/self-calibration.rmd @@ -0,0 +1,371 @@ +--- +title: "self-calibration" +author: "Hyunji Moon, Shinyoung Kim" +output: + html_document: default + pdf_document: default +--- +```{r setup, include=FALSE, warning=FALSE} +library(SBC) +library(cmdstanr) +library(parallel) +library(bayesplot) +library(posterior) +library(dplyr) +library(rstan) +library(future) +library(ggpubr) +library(rstanarm) +library(ggplot2) +options(mc.cores = parallel::detectCores()) +plan(multisession) +options(SBC.min_chunk_size = 5) +set.seed(1984) +``` + +Definition: + +- simulation prior := prior distribution that informs the prior predictive simulations and is being iteratively updated + +- inference prior := prior distribution that informs the posterior distribution, and hence the posterior sampling method + +- prior predictive distribution $P(y)$:= marginal distribution of $y, p(y)$ + +- data-averaged posterior $P(\theta')$:= combined posterior samples from each datasets + +- posterior sampling method := a.k.a. inference algorithm in which distribution is the function of prior predictive distribution and inference prior + +- default prior of chosen likelihood and posterior sampling method := wide enough prior to the level of not hurting self-consistency + +- SBC™: original SBC diagnose from Talts et. al. paper whose result we use for comparison before and after calibration purpose. + +Prior predictive distribution is determined by two components, prior distribution and likelihood. +$$p(y_1,..,y_n) = \int \prod_{i=1}^{n} f(y_i|\theta)p(\theta)d\theta$$ + +Let us denote the distribution of likelihood and posterior sampling method as $F, G^{-1}$. Inverse is used to denote most posterior sampling methods reversely uses likelihood distribution to minimize the distance between the target and generated distribution. In SBC package, `glm` formula used for predictive distribution can be used as a approximation, what we call backend. See [implementing_backends](https://hyunjimoon.github.io/SBC/articles/implementing_backends.htmls) vignette for this. + +![F, G_t^{-1}, Regularizer determine the outcome of converged distribution, which we call default prior.](iter_overview.png) + +Three steps are involved: +- 1. `generator`: generate parameter and outcome from prior $\pi_{\lambda_n}(\theta)$ and likelihood $f(y|\theta)$ +- 2. `backend`: sample$\theta$ given prior, generated outcome, and posterior sampling methods +- 3. `self_calib`: update hyparameter $\lambda$ based on the difference between prior and data-averaged posterior + +In 2, for inference algorithms that returns point estimates on hyperparameter instead of parameter samples (e.g. laplace approximation or VI), we can change this to posterior sampling method by generating samples with $\hat{\lambda}$. + +Proposition: Iteration of prior predictive simulation, posterior sampling, and regularizing converge to a default prior for a given prior distribution family, likelihood, and inference algorithm. + +This is due to the recurrence of well-calibrated regions which will be illustrated in experiment 1 which shows different priors converging to the same distribution for simple Bernoulli likelihood and Laplace approximation as a inference algorithm. This approximation truncates Taylor expansion of the log target density at the mode to the sencond order i.e. $\mu_{t+1} = argmax \;f(w)$, $\sigma_{t+1} = -\frac{d^{2}}{d w^{2}} \log f(w) ; w=w_{0}$. + +These are general settings for our experiment. One critical requirement for SBC™ is that the observational space has to match the observational space of interest. Not seeing diagnostic problems for posterior computation with `nobs`= 10 component observations doesn’t mean that the same computational method will also work for `nobs`= 100 component observations or vice versa. +```{R} +## Generator settings +# number of SBC simulations per iteration (generator) +nsims <- 80 + +# number of observations +nobs <- 2 + +# link function (1 = logit, 2 = probit, 3 = cloglog) +link <- 1 + +# number of binomial trials per observation +nsize <- 10 + +## Backend settings +# number of draws per posterior approximation +ndraws <- 100 + +# number of chains for hmc posterior approximation +nchains <- 2 +``` + +Received comments: + - using random variable as X notation could help us learn the characteristic of operations $y = I(U \leq p(x)), U \sim Unif(0,1), p(x) = \frac{1}{1+ e^{-x}}$ + - $\mathcal{T} \pi = \pi$ is the equation we wish to solve where $\mathcal{T} \pi:=\iint g(\theta \mid y, f, \pi) f\left(y \mid \theta^{\prime}\right) \pi\left(\theta^{\prime}\right) d y d \theta^{\prime}$. Parameterizing $\pi$, the goal changes to finding $\|\mathcal{T} \pi - \pi\| \leq \epsilon$ as it is hard to expect data-averaged posterior to be within parameterized family. For instance, data-averaged posterior from normal prior would not be normal in general. For brevity, $g_\lambda(\theta \mid y) := g(\theta \mid y, f, \pi_\lambda)$ and using a squared loss, then $\mathcal{T} \pi_{\lambda}=\iint g_{\lambda}(\theta \mid y) f\left(y \mid \theta^{\prime}\right) \pi_{\lambda}\left(\theta^{\prime}\right) d y d \theta^{\prime}$ and the self-calibration could be designed in the direction of $\frac{dZ_{\lambda}}{d\lambda}$ where $Z_{\lambda} := \int (\mathcal{T}\pi_{\lambda}(\theta) - \pi_\lambda(\theta))^2\pi^p(\theta)d\theta$. Note that calculating gradient is easier when the penalty weights are fixed which is why $\pi_p$ is used instead of $\pi_\lambda$. $pi^p$ is a penalizing prior on which calibration loss is averaged over in order to update lambda. + +Then$\frac{dZ_{\lambda}}{d\lambda} = E[(\pi_\lambda(\theta))(\partial_\lambda\pi_{\lambda}(\theta))]$ where the expectation is + +# PASSIVE UPDATE +# TODO once SBC_backend supports rstan optimizing. +# Experient 1. 1.Normal simulation prior (samples: 100), 2.Bernouli-logit prior predictive simultation, 3. Laplace approximation posterior sampling (draws: 100) 4. mean and sd for DAP summary and use it for the next prior parameter + +Target parameter is logit-transformed probability, $\eta$. Binomial likelihood and laplace approximation inference algorithm on logit scale is used. Hyperparameters for laplace approximation are $\mu, \sigma$ which correspond to posterior distribution mode and second derivative at the mode. These hyperparameter values are set as the prior parameter for the iteration. Results show starting from $N(0, 3^2)$ distribution, initial non-normal distribution slowly transforms to normal form to adjust to the constraints imposed by the approximation of inference algorithm, in this case normal distribution. Final convergence is around $N(0, 0.5^2)$. + +Could we make this convergence to the well-calibrated prior more efficient, considering usecases with many calibration-target parameters? + +# ACTIVE UPDATE +# TODO once SBC_backend supports rstan optimizing. +# Experient 2. 1.Normal simulation prior (samples: 100), 2.Bernouli-logit prior predictive simultation, 3. Laplace approximation posterior sampling (draws: 100) 4. coupling+gradient update based on moment-matching DAP summary (mean, sd) + +# Experient 3. 1. Normal simulation prior (samples: 100), 2.Gamma-glm prior predictive simultation, 3. HMC posterior sampling (draws: 100) 4. coupling+gradient update based on moment-matching DAP summary (mean, sd) + +```{r, warning=FALSE, error=FALSE} +# step1: inferring posterior +generator_binom <- function(lambda_mu, lambda_sigma, fixed_args){ + # fixed value across simulated datasets + # experiment settings + nobs <- fixed_args$nobs + nsize <- fixed_args$nsize + # modular settings + link_type <- fixed_args$link_type + + # generate + eta <- rnorm(1, mean = lambda_mu, sd=lambda_sigma) + mu <- invtf_param_vec(eta, link_type = link_type) + Y <- rbinom(nobs, size = nsize, prob = mu) + list( + parameters = list(eta = eta), + generated = list(nobs= nobs, nsize = nsize, link = link_type, + lambda_mu = lambda_mu, lambda_log_sigma = log(lambda_sigma), + Y = Y) + ) +} +fixed_args_binom <- list(nobs = nobs, nsize = nsize, link_type = 1, nsims = nsims, ndraws = ndraws) + +# initial prior hyperparameters +lambda_mu <- 1 +lambda_sigma <- 10 + +datasets_binom <- generate_datasets(SBC_generator_function(generator_binom, lambda_mu, lambda_sigma, fixed_args_binom), n_datasets = fixed_args_binom$nsims) +``` + +```{R} +## Self-calib settings + +# hyperparameter update algorithm +updator = "gradient" + +# maximal number of SBC iterations +niter <- 100 + +# tolerance +tol <- 0.02 + +# learning rate +gamma <- 0.5 + +# step2: inferring posterior +rstan_binom_mod <- stan_model("models/binom-laplace.stan") +cmdstan_binom_mod <- cmdstanr::cmdstan_model("models/binom-laplace.stan") + +backend_binom_opt <- SBC_backend_rstan_optimizing(rstan_binom_mod, draws = ndraws) +backend_binom_hmc <- SBC_backend_cmdstan_sample(cmdstan_binom_mod, chains = 4, iter_sampling = ndraws / 4) # thin = 10 + +# initial badly calibrated +result_binom_opt <- compute_results(datasets_binom, backend_binom_opt, thin_ranks = 1) +plot_rank_hist(result_binom_opt) +# result_binom_hmc <- compute_results(datasets_binom, backend_binom_hmc) + +# step3: updating hyperparmeters +# wrapper function to follow `self_calib_adaptive` interface +calib_generator <- function(lambda_mu, lambda_sigma, fixed_args){ + generate_datasets(SBC_generator_function(generator_binom, lambda_mu, lambda_sigma, fixed_args), n_datasets = fixed_args$nsims) +} +sc_opt <- self_calib_adaptive(calib_generator, backend_binom_opt, updator, "eta", lambda_mu, lambda_sigma, nsims, tol, fixed_args = fixed_args_binom) +sc_hmc <- self_calib_adaptive(calib_generator, backend_binom_hmc, updator, "eta", lambda_mu, lambda_sigma, nsims, tol, fixed_args = fixed_args_binom) + +plot_rank_hist(sc_opt) +``` + +## 2. Gamma regression +HMC is underdispersed. ADVI is skewed to the right and has a tendency to under-estimate. + +```{r warning=FALSE} +generator_gamma <- function(lambda_mu, lambda_sigma, fixed_args){ + # fixed value across simulated datasets + ## meta + nobs <- fixed_args$nobs + K <- fixed_args$K + shape <- fixed_args$shape + # predictor + X <- array(rnorm(nobs * K, mean = 1, sd = 1), dim = c(nobs, K)) + b <- rnorm(K, mean = 0, sd = 1) + # generate + eta <- rnorm(1, mean = lambda_mu, sd=lambda_sigma) + logmu <- as.numeric(eta+ X %*% b) + mu <- exp(logmu) + Y <- rgamma(nobs, shape = shape, scale = mu/shape) + list( + parameters = list(eta = eta), + generated = list(nobs= nobs, K = K, X = X, shape = shape, + lambda_mu = lambda_mu, lambda_log_sigma = log(lambda_sigma), + Y = Y) + ) +} + +# prior hyperparameters +lambda_mu <- 2 +lambda_sigma <- 5 + +fixed_args_gamma <- list(nobs = nobs, K = 15, shape = 1, nsims = nsims) +datasets_gamma <- generate_datasets(SBC_generator_function(generator_gamma, lambda_mu, lambda_sigma, fixed_args_gamma), n_datasets = nsims) + +# step2: posterior sampling +mod_gr <- cmdstanr::cmdstan_model("models/gamma-reg.stan") +rstan_mod_gr <- stan_model("models/gamma-reg.stan") +backend_gamma_hmc <- SBC_backend_cmdstan_sample(mod_gr, chains = 4, iter_sampling = ndraws / 4) # thin = 10 +backend_gamma_opt <- SBC_backend_rstan_optimizing(rstan_mod_gr, draws = ndraws) +#result_25_hmc <- compute_results(datasets_gamma, backend_hmc, thin_ranks = 1) + +result_25_opt <- compute_results(datasets_binom, backend_binom_opt, thin_ranks = 1) +result_25_hmc <- compute_results(datasets_binom, backend_gamma_hmc, thin_ranks = 1) +plot_rank_hist(result_25_hmc) +plot_rank_hist(result_25_opt) +``` + + +```{r setup, include=FALSE} +## Self-calib settings +# initial prior hyperparameters +lambda_mu <- 2 +lambda_sigma <- 5 + +# hyperparameter update algorithm +updator = "heuristic" + +# maximal number of SBC iterations +niter <- 100 + +# tolerance +tol <- 0.02 + +# learning rate +gamma <- 0.5 + +# step3: updating hyperparmeters +# wrapper function to follow `self_calib_adaptive` interface +calib_generator <- function(lambda_mu, lambda_sigma, fixed_args){ + generate_datasets(SBC_generator_function(generator_gamma, lambda_mu, lambda_sigma, fixed_args), n_datasets = fixed_args$nsims) +} +param_sc_hmc <- self_calib_adaptive(calib_generator, backend_hmc, updator, "eta", lambda_mu, lambda_sigma, nsims, tol, list(fixed_args = fixed_args_binom)) +plot_rank_hist(param_sc_hmc) + +backend_vi <- SBC_backend_cmdstan_variational(mod_gr, output_samples = ndraws, algorithm = "fullrank") + +result_25_vi <- compute_results(datasets_gamma, backend_vi, thin_ranks = 1) +plot_rank_hist(result_25_vi) +param_sc_vi <- self_calib_adaptive(generator_gamma, cmdstan_backend_vi, "eta", lambda_mu, lambda_sigma, nsims, tol, fixed_args = list(fixed_args = fixed_args)) +plot_rank_hist(param_sc_vi) + + +library(SBC) #devtools::install_github("hyunjimoon/SBC") +library(cmdstanr) +library(parallel) +library(bayesplot) +library(posterior) +library(dplyr) +library(future) +library(ggpubr) +library(mclust) +library(rstanarm) +options(mc.cores = parallel::detectCores()) +plan(multisession) +options(SBC.min_chunk_size = 5) +set.seed(1984) +devtools::load_all() +``` + +```{r echo=FALSE, message=FALSE, warning=FALSE} +knitr::opts_chunk$set( + include = TRUE, cache = FALSE, collapse = TRUE, echo = TRUE, + message = FALSE, tidy = FALSE, warning = FALSE, comment = " ", + dev = "png", dev.args = list(bg = '#FFFFF8'), dpi = 300, + fig.align = "center", fig.width = 7, fig.asp = 0.618, fig.show = "hold", + out.width = "90%") +``` + +```{r} +#_poisson, _logistic with transform_type = "log", / family +generator_gmm <- function(mixture_means, mixture_sds, fixed_sim_args){ + # fixed value across simulated datasets + ## meta + nobs <- fixed_sim_args$nobs + ndraws <- fixed_sim_args$ndraws + transform_types <- fixed_sim_args$transform_types + link_type <- fixed_sim_args$link_type + ## distribution-specific + shape <- fixed_sim_args$shape + + # predictor + if("X" %in% names(fixed_sim_args)) {X = fixed_sim_args$X} else X = 0 + # parameter with fixed distribution across `nsims` datasets + if("b" %in% names(fixed_sim_args)) {b <- fixed_sim_args$b} else b = 0 + # target variable updated at each iteration + a <- invtf_param_vec(rvar_rng(rnorm, n = 1, mean = sample(mixture_means$a, nsims, replace=TRUE), sd=mixture_sds$a, ndraws = nsims), tf = transform_types$a) + + # generate + mu = draws_of(a + X %**% b) + if(link_type == "log"){ + mu = exp(mu) + }else if(link_type == "logit"){ + mu <- invlogit(mu) + } + Y <- rvar_rng(rbinom, n = nobs, size = nsize, prob = mu, ndraws = nsims) + if(any(is.na(Y))) print(mu) + gen_rvars <- draws_rvars(nsims = nsims, nobs = nobs, nsize = nsize, + mixture_means = mixture_means$a, mixture_sds = mixture_sds$a, + Y = Y) + SBC_datasets( + parameters = as_draws_matrix(list(a = a)), + generated = draws_rvars_to_standata(gen_rvars) + ) +} +nsims = 30 +nobs = 100 +ndraws = 1000 +nsize = 2 +npredictors = 15 +ntarget_params = 1 +chains = 4 +transform_types = list(a = "identity") # parameter constraint in stan translated to generator (e.g. bound, simplex) +fixed_sim_args <- list(nobs = nobs, ndraws = ndraws, nsize = nsize, link_type = "logit", transform_types = transform_types, shape = 1, b = rvar_rng(rnorm, ndraws = 1, n = npredictors, 0, 1), X = rvar(array(rnorm(n = nobs * npredictors, mean = 1, sd = 1), dim = c(1, nobs, npredictors)))) + +# proxy for target variable +mixture_means = draws_rvars(a = rvar(array(rep(rnorm(nsims, 2, 5), each = nsims), dim = c(nsims, nsims)))) +mixture_sds = draws_rvars(a = rvar(array(rep(1, nsims), dim=c(nsims, 1)))) + +datasets_25 <- generator_gmm( + mixture_means = mixture_means, + mixture_sds = mixture_sds, + fixed_sim_args = fixed_sim_args +) + +#mod_gmm <- cmdstanr::cmdstan_model("./models/gamma-reg_gmm.stan") +mod_gmm <- cmdstanr::cmdstan_model("./models/binom-laplace_gmm.stan") +backend_hmc_gmm <- SBC_backend_cmdstan_sample(mod_gmm, chains = chains, iter_sampling = ndraws / chains) +``` + +Starting from a bad SBC plot (first), better SBC plot (second) is found after three iterations. +```{r} +result_25_hmc <- compute_results(datasets_25, backend_hmc_gmm, thin_ranks = 3) +plot_rank_hist(result_25_hmc) + +# self-calibrate +param_sc_hmc <- self_calib(generator_gmm, backend_hmc_gmm, mixture_means, mixture_sds, nsims_fn = function(...){nsims}, thin = 3, transform_types = transform_types, fixed_generator_args = list(fixed_sim_args = fixed_sim_args)) +plot_rank_hist(param_sc_hmc) +``` + +For ADVI, $N(2,5^2)$ for the coefficient breaks in every case and therefore $N(2,1^2)$ is used. Notice as the self-consistent parameter region is a function of a generator and inference engine; prior $N(2,5^2)$ or $N(2,1^2)$ is simply used as an initial distribution. It remains to be seen whether different initial distribution converge to near enough region (near uniqueness). + +```{r} +mixture_means_21 = draws_rvars(a = rvar(array(rep(rnorm(nsims, 2, 1), each = nsims), dim = c(nsims, nsims)))) +datasets_21 <- generator_gmm( + mixture_means = mixture_means_21, + mixture_sds = mixture_sds, + fixed_values = fixed_values +) + +backend_vi <- SBC_backend_cmdstan_variational(mod_gmm, output_samples = ndraws, algorithm = "fullrank") +result_21_vi <- compute_results(datasets_21, backend_vi, thin_ranks = 1) +plot_rank_hist(result_21_vi) + +# self-calibrate +param_sc_vi <- self_calib(generator_gmm, backend_vi, mixture_means_21, mixture_sds, nsims_fn = function(...){nsims}, thin = 1, fixed_generator_args = list(fixed_values = fixed_values, )) +plot_rank_hist(param_sc_vi) +``` + +```{r} +plot_ecdf_diff(param_sc_vi) +plot_ecdf_diff(result_21_vi) +``` diff --git a/vignettes/small_model_workflow.Rmd b/vignettes/small_model_workflow.Rmd index a130404..561c1a3 100644 --- a/vignettes/small_model_workflow.Rmd +++ b/vignettes/small_model_workflow.Rmd @@ -25,7 +25,7 @@ The workflow described here focuses on small models. "Small" means that the model is relatively fast to fit and we don't have to worry about computation too much. Once running ~100 fits of the model becomes too costly, there are additional tricks and considerations that we hope to delve into in a "Building a complex model" vignette (which currently doesn't exist). -Still many of the approaches here also apply to complex models (especially starting small and building each component separately), and with proper separation of the model into components, +Still many of the approaches here also apply to complex models (especially starting small and building smaller submodels separately), and with proper separation of the model into submodels, one can validate big chunks of Stan code while working with small models only. We expect the reader to be familiar with basics of the package. If not, @@ -47,16 +47,16 @@ those covariates and the prevalence of either subspecie. ## Big picture -This model naturally decomposes into two components: +This model naturally decomposes into two submodels: -1) the mixture component where the mixing ratio is the same -for all variables +1) the mixture submodel where the mixing ratio is the same +for all observations 2) a beta regression where we take covariates and make a prediction of a probability, assuming we (noisily) observe the probability. It is good practice to start small and implement and validate each of those -components separately and then put them together and validate the bigger model. +submodels separately and then put them together and validate the bigger model. This makes is substantially easier to locate bugs. You'll notice that the process ends up involving a lot of steps, but the fact is that we still ignore all the completely invalid models I @@ -71,14 +71,15 @@ Let's setup and get our hands dirty: ```{r setup, message=FALSE,warning=FALSE, results="hide"} library(SBC) -use_cmdstanr <- TRUE # Set to false to use rstan instead +use_cmdstanr <- getOption("SBC.vignettes_cmdstanr", TRUE) # Set to false to use rstan instead + +if(use_cmdstanr) { + library(cmdstanr) +} else { + library(rstan) + rstan_options(auto_write = TRUE) +} -# if(use_cmdstanr) { -# library(cmdstanr) -# } else { -# library(rstan) -# } -library(cmdstanr) library(bayesplot) library(posterior) @@ -88,20 +89,24 @@ plan(multisession) options(SBC.min_chunk_size = 5) # Setup caching of results -cache_dir <- "./small_model_worklow_SBC_cache" +if(use_cmdstanr) { + cache_dir <- "./_small_model_worklow_SBC_cache" +} else { + cache_dir <- "./_small_model_worklow_rstan_SBC_cache" +} if(!dir.exists(cache_dir)) { dir.create(cache_dir) } ``` -## Mixture component +## Mixture submodel There is a good [guide to mixtures](https://mc-stan.org/docs/2_27/stan-users-guide/mixture-modeling-chapter.html) in the Stan user's guide. Following the user's guide would save us from a lot of mistakes, but for the sake of example, we will pretend we didn't really read it - and we'll see the problems can be discovered via simulations. -So this is our first try at implementing the mixture component: +So this is our first try at implementing the mixture submodel: ```{r, comment = ""} @@ -110,8 +115,13 @@ cat(readLines("small_model_workflow/mixture_first.stan"), sep = "\n") ```{r} -model_first <- cmdstan_model("small_model_workflow/mixture_first.stan") -backend_first <- SBC_backend_cmdstan_sample(model_first) +if(use_cmdstanr) { + model_first <- cmdstan_model("small_model_workflow/mixture_first.stan") + backend_first <- SBC_backend_cmdstan_sample(model_first) +} else { + model_first <- stan_model("small_model_workflow/mixture_first.stan") + backend_first <- SBC_backend_rstan_sample(model_first) +} ``` And this is our code to simulate data for this model: @@ -132,7 +142,7 @@ generator_func_first <- function(N) { } list( - parameters = list( + variables = list( mu1 = mu1, mu2 = mu2, theta = theta @@ -147,7 +157,7 @@ generator_func_first <- function(N) { generator_first <- SBC_generator_function(generator_func_first, N = 50) ``` -Let's start with just a single dataset: +Let's start with just a single simulation: ```{r} set.seed(68455554) @@ -155,7 +165,7 @@ datasets_first <- generate_datasets(generator_first, 1) ``` ```{r} -results_first <- compute_results(datasets_first, backend_first, +results_first <- compute_SBC(datasets_first, backend_first, cache_mode = "results", cache_location = file.path(cache_dir, "mixture_first")) ``` @@ -163,7 +173,11 @@ results_first <- compute_results(datasets_first, backend_first, Oh, we have convergence problems, let us examine the pairs plots ```{r mixture_first_convergence} -mcmc_pairs(results_first$fits[[1]]$draws()) +if(use_cmdstanr) { + mcmc_pairs(results_first$fits[[1]]$draws()) +} else { + mcmc_pairs(results_first$fits[[1]]) +} ``` One thing that stands out is that either `mu1` is tightly determined and `mu2` is allowed the full prior range or the other way around. We also don't learn anything about theta. @@ -181,14 +195,20 @@ cat(readLines("small_model_workflow/mixture_fixed_log_mix.stan"), sep = "\n") ``` ```{r} -model_fixed_log_mix <- cmdstan_model("small_model_workflow/mixture_fixed_log_mix.stan") -backend_fixed_log_mix <- SBC_backend_cmdstan_sample(model_fixed_log_mix) +if(use_cmdstanr) { + model_fixed_log_mix <- cmdstan_model("small_model_workflow/mixture_fixed_log_mix.stan") + backend_fixed_log_mix <- SBC_backend_cmdstan_sample(model_fixed_log_mix) +} else { + model_fixed_log_mix <- stan_model("small_model_workflow/mixture_fixed_log_mix.stan") + backend_fixed_log_mix <- SBC_backend_rstan_sample(model_fixed_log_mix) + +} ``` -So let's try once again with the same single dataset: +So let's try once again with the same single simulation: ```{r} -results_fixed_log_mix <- compute_results(datasets_first, backend_fixed_log_mix, +results_fixed_log_mix <- compute_SBC(datasets_first, backend_fixed_log_mix, cache_mode = "results", cache_location = file.path(cache_dir, "mixture_fixed_log_mix")) ``` @@ -199,7 +219,7 @@ No warnings this time. We look at the stats: results_fixed_log_mix$stats ``` -We see nothing obviously wrong, the posterior means are relatively close to simulated values (as summarised by the z-scores) - no parameter is clearly ridiculously misfit. So let's run a few more iterations. +We see nothing obviously wrong, the posterior means are relatively close to simulated values (as summarised by the z-scores) - no variable is clearly ridiculously misfit. So let's run a few more iterations. ```{r} set.seed(8314566) @@ -207,7 +227,7 @@ datasets_first_10 <- generate_datasets(generator_first, 10) ``` ```{r} -results_fixed_log_mix_2 <- compute_results(datasets_first_10, backend_fixed_log_mix, +results_fixed_log_mix_2 <- compute_SBC(datasets_first_10, backend_fixed_log_mix, cache_mode = "results", cache_location = file.path(cache_dir, "mixture_fixed_log_mix_2")) ``` @@ -222,7 +242,11 @@ hist(results_fixed_log_mix_2$stats$rhat) Let's examine a single pairs plot: ```{r mixture_fixed_log_mix_pairs} -mcmc_pairs(results_fixed_log_mix_2$fits[[1]]$draws()) +if(use_cmdstanr) { + mcmc_pairs(results_fixed_log_mix_2$fits[[1]]$draws()) +} else { + mcmc_pairs(results_fixed_log_mix_2$fits[[1]]) +} ``` We clearly see two modes in the posterior. And upon reflection, we can see why: swapping `mu1` with `mu2` while also changing `theta` for `1 - theta` gives _exactly_ the same likelihood - because the ordering does not matter. A more detailed explanation of this type of problem is at https://betanalpha.github.io/assets/case_studies/identifying_mixture_models.html @@ -237,8 +261,13 @@ cat(readLines("small_model_workflow/mixture_fixed_ordered.stan"), sep = "\n") ```{r} -model_fixed_ordered <- cmdstan_model("small_model_workflow/mixture_fixed_ordered.stan") -backend_fixed_ordered <- SBC_backend_cmdstan_sample(model_fixed_ordered) +if(use_cmdstanr) { + model_fixed_ordered <- cmdstan_model("small_model_workflow/mixture_fixed_ordered.stan") + backend_fixed_ordered <- SBC_backend_cmdstan_sample(model_fixed_ordered) +} else { + model_fixed_ordered <- stan_model("small_model_workflow/mixture_fixed_ordered.stan") + backend_fixed_ordered <- SBC_backend_rstan_sample(model_fixed_ordered) +} ``` We also need to update the generator to match the new names and ordering constant: @@ -246,7 +275,7 @@ We also need to update the generator to match the new names and ordering constan generator_func_ordered <- function(N) { # If the priors for all components of an ordered vector are the same # then just sorting the result of a generator is enough to create - # a valid sample from the ordered vector + # a valid draw from the ordered vector prior mu <- sort(rnorm(2, 3, 1)) theta <- runif(1) @@ -260,7 +289,7 @@ generator_func_ordered <- function(N) { } list( - parameters = list( + variables = list( mu = mu, theta = theta ), @@ -274,7 +303,7 @@ generator_func_ordered <- function(N) { generator_ordered <- SBC_generator_function(generator_func_ordered, N = 50) ``` -We are kind of confident (and the model fits quickly), so we'll already start with 10 datasets. +We are kind of confident (and the model fits quickly), so we'll already start with 10 simulations. ```{r} set.seed(3785432) @@ -283,12 +312,12 @@ datasets_ordered_10 <- generate_datasets(generator_ordered, 10) ```{r} -results_fixed_ordered <- compute_results(datasets_ordered_10, backend_fixed_ordered, +results_fixed_ordered <- compute_SBC(datasets_ordered_10, backend_fixed_ordered, cache_mode = "results", cache_location = file.path(cache_dir, "mixture_fixed_ordered")) ``` -Now some fits still produce problematic Rhats or divergent transitions, let's browse the `$backend_diagnostics` (which contain Stan-specific diagnostic values) to see which datasets are causing problems: +Now some fits still produce problematic Rhats or divergent transitions, let's browse the `$backend_diagnostics` (which contain Stan-specific diagnostic values) to see which simulations are causing problems: ```{r} results_fixed_ordered$backend_diagnostics @@ -299,40 +328,44 @@ One of the fits has quite a lot of divergent transitions. Let's look at the pair ```{r mixture_fixed_ordered_pairs} problematic_fit_id <- 2 problematic_fit <- results_fixed_ordered$fits[[problematic_fit_id]] -mcmc_pairs(problematic_fit$draws(), np = nuts_params(problematic_fit)) +if(use_cmdstanr) { + mcmc_pairs(problematic_fit$draws(), np = nuts_params(problematic_fit)) +} else { + mcmc_pairs(problematic_fit, np = nuts_params(problematic_fit)) +} ``` There is a lot of ugly stuff going on. Notably, one can notice that the posterior of theta is bimodal, preferring either almost 0 or almost 1 - and when that happens, the mean of one of the components is almost unconstrained. Why does that happen? The key to the answer is in the simulated values for the component means: ```{r} -subset_draws(datasets_ordered_10$parameters, draw = problematic_fit_id) +subset_draws(datasets_ordered_10$variables, draw = problematic_fit_id) ``` -We were unlucky enough to simulate a dataset where both components have almost the same mean and thus we are actually looking at a dataset that is not really a mixture. Mixture models can misbehave badly in such cases (see once again the [case study by Mike Betancourt](https://betanalpha.github.io/assets/case_studies/identifying_mixture_models.html#5_singular_components_and_computational_issues) for a bit more detailed dive into this particular problem). +We were unlucky enough to simulate data where both components have almost the same mean and thus we are actually looking at data that is not really a mixture. Mixture models can misbehave badly in such cases (see once again the [case study by Mike Betancourt](https://betanalpha.github.io/assets/case_studies/identifying_mixture_models.html#5_singular_components_and_computational_issues) for a bit more detailed dive into this particular problem). ### Fixing degenerate components? What to do about this? Fixing the model to handle such cases gracefully is hard. But the problem is basically our prior - we want to express that (since we are fitting a two component model), we don't expect the means to be too similar. So if we can change our simulation to avoid this, we'll be able to proceed with SBC. If such a pattern appeared in real data, we would still have a problem, but we would notice thanks to the diagnostics. -This can definitely be done. But another way is to just ignore the datasets that had divergences for SBC calculations. It turns out that if we remove datasets in a way that only depends on the observed data (and not on unobserved parameters), the SBC identity is preserved and we can use SBC without modifications. The resulting check is however telling us something only for datasets that were not rejected. In this case this is not a big issue: if a fit had divergent transitions, we would not trust it anyway, so removing fits with divergent transitions is not such a big deal. +This can definitely be done. But another way is to just ignore the simulations that had divergences for SBC calculations. It turns out that if we remove simulations in a way that only depends on the observed data (and not on unobserved variables), the SBC identity is preserved and we can use SBC without modifications. The resulting check is however telling us something only for data that were not rejected. In this case this is not a big issue: if a fit had divergent transitions, we would not trust it anyway, so removing fits with divergent transitions is not such a big deal. For more details see the [`rejection_sampling`](https://hyunjimoon.github.io/SBC/articles/rejection_sampling.html) vignette. So let us subset the results to avoid divergences: ```{r} -dataset_ids_to_keep <- - results_fixed_ordered$backend_diagnostics$dataset_id[ +sim_ids_to_keep <- + results_fixed_ordered$backend_diagnostics$sim_id[ results_fixed_ordered$backend_diagnostics$n_divergent == 0] # Equivalent tidy version if you prefer -# dataset_ids_to_keep <- results_fixed_ordered$backend_diagnostics %>% +# sim_ids_to_keep <- results_fixed_ordered$backend_diagnostics %>% # dplyr::filter(n_divergent == 0) %>% -# dplyr::pull(dataset_id) +# dplyr::pull(sim_id) -results_fixed_ordered_subset <- results_fixed_ordered[dataset_ids_to_keep] +results_fixed_ordered_subset <- results_fixed_ordered[sim_ids_to_keep] summary(results_fixed_ordered_subset) ``` @@ -360,7 +393,7 @@ coverage ```{r, echo=FALSE} theta_90_coverage_string <- paste0(round(100 * as.numeric( - coverage[coverage$parameter == "theta" & coverage$width == 0.9, c("ci_low","ci_high")])), + coverage[coverage$variable == "theta" & coverage$width == 0.9, c("ci_low","ci_high")])), "%", collapse = " - ") ``` @@ -378,7 +411,7 @@ You generally don't want to do this unless you are really short on memory, as it ```{r} set.seed(54987622) datasets_ordered_100 <- generate_datasets(generator_ordered, 100) -results_fixed_ordered_100 <- compute_results(datasets_ordered_100, backend_fixed_ordered, +results_fixed_ordered_100 <- compute_SBC(datasets_ordered_100, backend_fixed_ordered, keep_fits = FALSE, cache_mode = "results", cache_location = file.path(cache_dir, "mixture_fixed_ordered_100")) ``` @@ -386,17 +419,17 @@ results_fixed_ordered_100 <- compute_results(datasets_ordered_100, backend_fixed Once again we subset to keep only non-divergent fits - this also removes all the problematic Rhats and ESS. ```{r} -dataset_ids_to_keep <- - results_fixed_ordered_100$backend_diagnostics$dataset_id[ +sim_ids_to_keep <- + results_fixed_ordered_100$backend_diagnostics$sim_id[ results_fixed_ordered_100$backend_diagnostics$n_divergent == 0] # Equivalent tidy version -# dataset_ids_to_keep <- results_fixed_ordered_100$backend_diagnostics %>% +# sim_ids_to_keep <- results_fixed_ordered_100$backend_diagnostics %>% # dplyr::filter(n_divergent == 0) %>% -# dplyr::pull(dataset_id) +# dplyr::pull(sim_id) -results_fixed_ordered_100_subset <- results_fixed_ordered_100[dataset_ids_to_keep] +results_fixed_ordered_100_subset <- results_fixed_ordered_100[sim_ids_to_keep] summary(results_fixed_ordered_100_subset) ``` @@ -421,10 +454,10 @@ plot_coverage(results_fixed_ordered_combined) Note: it turns out that extending the model to more components becomes somewhat tricky as the model can become sensitive to initialization. Also the problems with data that can be explained by fewer components than the model assumes become more prevalent. -## Beta regression component +## Beta regression submodel -Let's move to the beta regression component of our model. After spending a bunch of time -implementing this, I realized, that maybe treating this as a logistic regression component would have been wiser (and sufficient). But I am gonna keep it in - it just demonstrates that a real workflow can be messy and let's us show some additional classes of problems and how they manifest in SBC. +Let's move to the beta regression submodel of our model. After spending a bunch of time +implementing this, I realized, that maybe treating this as a logistic regression submodel would have been wiser (and sufficient). But I am gonna keep it in - it just demonstrates that a real workflow can be messy and let's us show some additional classes of problems and how they manifest in SBC. Checking the wiki page for Beta distribution, we notice that it has two parameters, both bounded to be positive. So our first attempt at beta regression just creates two linear predictors - one for each parameter of the distribution. We then exponentiate the predictors to make them positive and we have a model: @@ -434,8 +467,13 @@ cat(readLines("small_model_workflow/beta_first.stan"), sep = "\n") ``` ```{r} -model_beta_first <- cmdstan_model("small_model_workflow/beta_first.stan") -backend_beta_first <- SBC_backend_cmdstan_sample(model_beta_first) +if(use_cmdstanr) { + model_beta_first <- cmdstan_model("small_model_workflow/beta_first.stan") + backend_beta_first <- SBC_backend_cmdstan_sample(model_beta_first) +} else { + model_beta_first <- stan_model("small_model_workflow/beta_first.stan") + backend_beta_first <- SBC_backend_rstan_sample(model_beta_first) +} ``` We also write a matching generator (microoptimization tip: I usually write Stan models first so that I can work on the generator code while the Stan model compiles): @@ -465,7 +503,7 @@ generator_func_beta_first <- function(N_obs, N_predictors) { } list( - parameters = list( + variables = list( beta = beta ), generated = list( @@ -480,11 +518,11 @@ generator_func_beta_first <- function(N_obs, N_predictors) { generator_beta_first <- SBC_generator_function(generator_func_beta_first, N_obs = 50, N_predictors = 3) ``` -One thing to note is that we add a rejection sampling step - we repeatedly generate datasets, +One thing to note is that we add a rejection sampling step - we repeatedly generate simulations, until we find one without `y` values very close to 1. Those can be problematic as they -can be rounded to 1 when the data for Stan is written to disk. And exact 1 is impossible with the Beta likelihood and the model will fail. Rejecting the dataset due to this criterion is quite rare and in fact, it does not threaten the validity of the SBC procedure (at least to the extent our real data also don't contain such extreme values) - for more details see the [`rejection_sampling`](https://hyunjimoon.github.io/SBC/articles/rejection_sampling.html) vignette. +can be rounded to 1 when the data for Stan is written to disk. And exact 1 is impossible with the Beta likelihood and the model will fail. Rejecting the simulation due to this criterion is quite rare and in fact, it does not threaten the validity of the SBC procedure (at least to the extent our real data also don't contain such extreme values) - for more details see the [`rejection_sampling`](https://hyunjimoon.github.io/SBC/articles/rejection_sampling.html) vignette. -We'll start with 10 datasets once again. +We'll start with 10 simulations once again. ```{r} set.seed(3325488) @@ -494,7 +532,7 @@ datasets_beta_first <- generate_datasets(generator_beta_first, 10) ```{r} -results_beta_first_10 <- compute_results(datasets_beta_first, backend_beta_first, +results_beta_first_10 <- compute_SBC(datasets_beta_first, backend_beta_first, cache_mode = "results", cache_location = file.path(cache_dir, "beta_first_10")) ``` @@ -509,7 +547,11 @@ plot_ecdf_diff(results_beta_first_10) Let's inspect the pairs plot for the offending fit: ```{r beta_first_10_pairs} -mcmc_pairs(results_beta_first_10$fits[[3]]$draws()) +if(use_cmdstanr) { + mcmc_pairs(results_beta_first_10$fits[[3]]$draws()) +} else { + mcmc_pairs(results_beta_first_10$fits[[3]]) +} ``` This is a very crowded plot and it is hard to resolve details, but we see some correlations between the corresponding beta elements (e.g. `beta[1,1]` and `beta[2,1]`), let's have a closer look and show the same pairs plot for five of our fits: @@ -517,11 +559,16 @@ This is a very crowded plot and it is hard to resolve details, but we see some c ```{r beta_first_10_pairs_subset} for(i in 1:5) { fit <- results_beta_first_10$fits[[i]] - print(mcmc_pairs(fit$draws(), pars = c("beta[1,1]", "beta[2,1]","beta[1,2]", "beta[2,2]"))) + if(use_cmdstanr) { + pairs_input <- fit$draws() + } else { + pairs_input <- fit + } + print(mcmc_pairs(pairs_input, pars = c("beta[1,1]", "beta[2,1]","beta[1,2]", "beta[2,2]"))) } ``` -Turns out the correlations are in all fits, although sometimes they are relatively weak and the sampler is able to handle the posterior, it is potentially troubling. The main issue is that we plan to integrate this model with other components and problems that can be tolerated in a single component might interact with other components and make the model intractable. +Turns out the correlations are in all fits, although sometimes they are relatively weak and the sampler is able to handle the posterior, it is potentially troubling. The main issue is that we plan to integrate this model with other submodels and problems that can be tolerated in a single submodel might interact with other submodels and make the model intractable. We can even understand the reason for the positive correlation - it is because predicted means of our response beta distribution is the `exp(linpred[1,]) / ( exp(linpred[1,]) + exp(linpred[2,]))` vector. @@ -533,9 +580,9 @@ predictor values for the first shape parameter are allowed to vary quite a bit a ### Parametrizing the beta distribution via mean -The simplest way to resolve the issue with the correlations is to explicitly parametrize the beta distribution by its mean ($0 < \mu < 1$). The more common parametrization than adds a precision parameter ($\phi > 0$), so we then have $y \sim \mathrm{Beta}(\mu \phi, (1 - \mu) \phi)$ +The simplest way to resolve the issue with the correlations is to explicitly parametrize the beta distribution by its mean ($0 < \mu < 1$). The more common parametrization then adds a precision parameter ($\phi > 0$), so we then have $y \sim \mathrm{Beta}(\mu \phi, (1 - \mu) \phi)$ -This also makes much more sense for the bigger task - combining with the mixture component, as we really want to predict just a single probability. So we'll rewrite our predictors to predict only the logit of the mean (as in logistic regression) and keep the precision as a constant between observations. We could definitely also decide whether to keep the full flexibility and allow predictors for precision, we just don't do it here. +This also makes much more sense for the bigger task - combining with the mixture submodel, as we really want to predict just a single probability. So we'll rewrite our predictors to predict only the logit of the mean (as in logistic regression) and keep the precision as a constant between observations. We could definitely also decide whether to keep the full flexibility and allow predictors for precision, we just don't do it here. This is then our updated model: @@ -544,9 +591,14 @@ cat(readLines("small_model_workflow/beta_precision.stan"), sep = "\n") ``` ```{r} -model_beta_precision <- cmdstan_model("small_model_workflow/beta_precision.stan") -backend_beta_precision <- SBC_backend_cmdstan_sample(model_beta_precision) - +if(use_cmdstanr) { + model_beta_precision <- cmdstan_model("small_model_workflow/beta_precision.stan") + backend_beta_precision <- SBC_backend_cmdstan_sample(model_beta_precision) +} else { + model_beta_precision <- stan_model("small_model_workflow/beta_precision.stan") + backend_beta_precision <- SBC_backend_rstan_sample(model_beta_precision) + +} ``` And we need to update the generator to match: @@ -576,7 +628,7 @@ generator_func_beta_precision <- function(N_obs, N_predictors) { } list( - parameters = list( + variables = list( beta = beta, phi = phi ), @@ -594,7 +646,7 @@ generator_beta_precision <- ``` -Starting with 10 datasets: +Starting with 10 simulations: ```{r} set.seed(46988234) @@ -604,7 +656,7 @@ datasets_beta_precision_10 <- generate_datasets(generator_beta_precision, 10) ```{r} -results_beta_precision_10 <- compute_results(datasets_beta_precision_10, backend_beta_precision, +results_beta_precision_10 <- compute_SBC(datasets_beta_precision_10, backend_beta_precision, cache_mode = "results", cache_location = file.path(cache_dir, "beta_precision_10")) ``` @@ -621,7 +673,7 @@ So we'll run 90 more iterations and combine them with the previous results: ```{r} set.seed(2136468) datasets_beta_precision_90 <- generate_datasets(generator_beta_precision, 90) -results_beta_precision_90 <- compute_results( +results_beta_precision_90 <- compute_SBC( datasets_beta_precision_90, backend_beta_precision, keep_fits = FALSE, cache_mode = "results", cache_location = file.path(cache_dir, "beta_precision_90")) @@ -642,7 +694,7 @@ plot_rank_hist(results_beta_precision_100) plot_ecdf_diff(results_beta_precision_100) ``` -The plots don't look terrible, but the `beta[2]` and especially the `phi` parameter show slight problems. +The plots don't look terrible, but the `beta[2]` and especially the `phi` variable show slight problems. So we look back at our model code and note that we forgot to put any prior on `phi`! Mismatches in priors between the model and the simulator are unfortunately often not very well visible for SBC and can require a lot of simulations to discover (see the [`limits_of_SBC`](https://hyunjimoon.github.io/SBC/articles/limits_of_SBC.html) vignette for more detailed discussion) @@ -656,16 +708,22 @@ cat(readLines("small_model_workflow/beta_precision_fixed_prior.stan"), sep = "\n ``` ```{r} -model_beta_precision_fixed_prior <- - cmdstan_model("small_model_workflow/beta_precision_fixed_prior.stan") -backend_beta_precision_fixed_prior <- SBC_backend_cmdstan_sample(model_beta_precision_fixed_prior) +if(use_cmdstanr) { + model_beta_precision_fixed_prior <- + cmdstan_model("small_model_workflow/beta_precision_fixed_prior.stan") + backend_beta_precision_fixed_prior <- SBC_backend_cmdstan_sample(model_beta_precision_fixed_prior) +} else { + model_beta_precision_fixed_prior <- + stan_model("small_model_workflow/beta_precision_fixed_prior.stan") + backend_beta_precision_fixed_prior <- SBC_backend_rstan_sample(model_beta_precision_fixed_prior) +} ``` -And recompute for all 100 datasets at once (as we don't expect adding prior to introduce huge problems). +And recompute for all 100 simulations at once (as we don't expect adding prior to introduce huge problems). ```{r} results_beta_precision_fixed_prior <- - compute_results(datasets_beta_precision_100, backend_beta_precision_fixed_prior, + compute_SBC(datasets_beta_precision_100, backend_beta_precision_fixed_prior, keep_fits = FALSE, cache_mode = "results", cache_location = file.path(cache_dir, "beta_precision_fixed_prior")) ``` @@ -675,7 +733,7 @@ plot_rank_hist(results_beta_precision_fixed_prior) plot_ecdf_diff(results_beta_precision_fixed_prior) ``` -Diagnostic plots are looking good! So we add 100 more datasets: +Diagnostic plots are looking good! So we add 100 more simulations: ```{r} @@ -684,7 +742,7 @@ datasets_beta_precision_100b <- generate_datasets(generator_beta_precision, 100) results_beta_precision_fixed_prior_200 <- bind_results( results_beta_precision_fixed_prior, - compute_results(datasets_beta_precision_100b, backend_beta_precision_fixed_prior, + compute_SBC(datasets_beta_precision_100b, backend_beta_precision_fixed_prior, keep_fits = FALSE, cache_mode = "results", cache_location = file.path(cache_dir, "beta_precision_fixed_prior_2"))) ``` @@ -701,7 +759,7 @@ is in quite tight agreement with theory: plot_coverage(results_beta_precision_fixed_prior_200) ``` -So for now we are also happy about the beta regression component. +So for now we are also happy about the beta regression submodel. ## Putting it together @@ -712,8 +770,13 @@ cat(readLines("small_model_workflow/combined_first.stan"), sep = "\n") ``` ```{r} -model_combined <- cmdstan_model("small_model_workflow/combined_first.stan") -backend_combined <- SBC_backend_cmdstan_sample(model_combined) +if(use_cmdstanr) { + model_combined <- cmdstan_model("small_model_workflow/combined_first.stan") + backend_combined <- SBC_backend_cmdstan_sample(model_combined) +} else { + model_combined <- stan_model("small_model_workflow/combined_first.stan") + backend_combined <- SBC_backend_rstan_sample(model_combined) +} ``` And this is our generator for the full model: @@ -722,7 +785,7 @@ And this is our generator for the full model: generator_func_combined <- function(N_obs, N_predictors) { # If the priors for all components of an ordered vector are the same # then just sorting the result of a generator is enough to create - # a valid sample from the ordered vector + # a valid draw from the ordered vector prior mu <- sort(rnorm(2, 3, 1)) beta <- rnorm(N_predictors, 0, 1) @@ -749,7 +812,7 @@ generator_func_combined <- function(N_obs, N_predictors) { list( - parameters = list( + variables = list( beta = beta, mu = mu ), @@ -773,7 +836,7 @@ dataset_combined <- generate_datasets(generator_combined, 200) ``` ```{r} -results_combined <- compute_results(dataset_combined, backend_combined, +results_combined <- compute_SBC(dataset_combined, backend_combined, keep_fits = FALSE, cache_mode = "results", cache_location = file.path(cache_dir, "combined")) ``` @@ -792,9 +855,9 @@ As done previously, we could just exclude the fits that had divergences, but jus The general idea is that although we might not want to/be able to express our prior belief about the model (here that the two mixture components are distinct) by priors on model parameters, we still may be able to express our prior belief about the data itself. -And it turns out that if we remove datasets that don't meet a certain condition imposed on the observed data, the implied prior on parameters becomes an additive constant and we can use exactly the same model to fit only the non-rejected datasets. Note that this does not hold if we rejected datasets based on their parameter values - for more details see the [`rejection_sampling`](https://hyunjimoon.github.io/SBC/articles/rejection_sampling.html) vignette. +And it turns out that if we remove simulations that don't meet a certain condition imposed on the observed data, the implied prior on parameters becomes an additive constant and we can use exactly the same model to fit only the non-rejected simulations. Note that this does not hold if we rejected simulations based on some unobserved variables - for more details see the [`rejection_sampling`](https://hyunjimoon.github.io/SBC/articles/rejection_sampling.html) vignette. -The main advantage is that if we can do this, we can avoid wasting computation on fitting datasets that would likely produce divergences anyway. The downside is that it means we no longer have a guarantee the model works for non-rejected datasets, so we need to check if the data we want to analyze would not be rejected by our criterion. +The main advantage is that if we can do this, we can avoid wasting computation on fitting data that would likely produce divergences anyway. The downside is that it means we no longer have a guarantee the model works for non-rejected data, so we need to check if the data we want to analyze would not be rejected by our criterion. How to build such a criterion here? We'll note that for Poisson-distributed variables the ratio of mean to variance (a.k.a the Fano factor) is always 1. So if the components are too similar, the data should resemble a Poisson distribution and have Fano factor of 1, while if the components are distinct the Fano factor will be larger. @@ -813,7 +876,7 @@ All the divergence are for low fano factors - this is the histogram of Fano fact hist(fanos[results_combined$backend_diagnostics$n_divergent > 0]) ``` -So what we'll do is that we'll reject any dataset with Fano factor < 1.5. In practice a simple way to implement this is to wrap our generator code in a loop and break from the loop only when the generated dataset meets our criteria (i.e. is not rejected). This is our code: +So what we'll do is that we'll reject any simulation where the observed data have Fano factor < 1.5. In practice a simple way to implement this is to wrap our generator code in a loop and break from the loop only when the generated data meet our criteria (i.e. is not rejected). This is our code: ```{r} generator_func_combined_reject <- function(N_obs, N_predictors) { @@ -823,7 +886,7 @@ generator_func_combined_reject <- function(N_obs, N_predictors) { repeat { # If the priors for all components of an ordered vector are the same # then just sorting the result of a generator is enough to create - # a valid sample from the ordered vector + # a valid draw from the ordered vector prior mu <- sort(rnorm(2, 3, 1)) beta <- rnorm(N_predictors, 0, 1) @@ -853,7 +916,7 @@ generator_func_combined_reject <- function(N_obs, N_predictors) { } list( - parameters = list( + variables = list( beta = beta, mu = mu ), @@ -870,7 +933,7 @@ generator_combined_reject <- SBC_generator_function(generator_func_combined_reject, N_obs = 50, N_predictors = 3) ``` -We'll once again fit our model to 200 datasets: +We'll once again fit our model to 200 simulations: ```{r} set.seed(44685226) @@ -880,7 +943,7 @@ dataset_combined_reject <- generate_datasets(generator_combined_reject, 200) ```{r} -results_combined_reject <- compute_results(dataset_combined_reject, backend_combined, +results_combined_reject <- compute_SBC(dataset_combined_reject, backend_combined, keep_fits = FALSE, cache_mode = "results", cache_location = file.path(cache_dir, "combined_reject")) ``` @@ -898,11 +961,11 @@ And our coverage is pretty tight: plot_coverage(results_combined_reject) ``` -Below we show the uncertainty for two parameters and some widths of central posterior intervals numerically: +Below we show the uncertainty for two variables and some widths of central posterior intervals numerically: ```{r results_combined_reject_coverage} stats_subset <- results_combined_reject$stats[ - results_combined_reject$stats$parameter %in% c("beta[1]", "mu[1]"),] + results_combined_reject$stats$variable %in% c("beta[1]", "mu[1]"),] empirical_coverage(stats_subset, c(0.25,0.5,0.9,0.95)) ``` @@ -914,7 +977,7 @@ set.seed(1395367854) dataset_combined_reject_more <- generate_datasets(generator_combined_reject, 300) results_combined_reject_more <- bind_results( results_combined_reject, - compute_results(dataset_combined_reject_more, backend_combined, + compute_SBC(dataset_combined_reject_more, backend_combined, keep_fits = FALSE, cache_mode = "results", cache_location = file.path(cache_dir, "combined_reject_more")) ) @@ -934,7 +997,7 @@ plot_coverage(results_combined_reject_more) ```{r} stats_subset <- results_combined_reject_more$stats[ - results_combined_reject_more$stats$parameter %in% c("beta[1]", "mu[2]"),] + results_combined_reject_more$stats$variable %in% c("beta[1]", "mu[2]"),] empirical_coverage(stats_subset, c(0.25,0.5,0.9,0.95)) ``` @@ -954,7 +1017,7 @@ that the posterior 90% interval for `beta[1]` excludes zero, i.e. that we learn ```{r} stats_beta1 <- results_combined_reject_more$stats[ - results_combined_reject_more$stats$parameter == "beta[1]",] + results_combined_reject_more$stats$variable == "beta[1]",] mean(sign(stats_beta1$q5) == sign(stats_beta1$q95)) ``` diff --git a/vignettes/stan/discrete_params1.stan b/vignettes/stan/discrete_vars1.stan similarity index 100% rename from vignettes/stan/discrete_params1.stan rename to vignettes/stan/discrete_vars1.stan