From 684ec037d992a70ddc834e63151f3d9d503b1c14 Mon Sep 17 00:00:00 2001 From: "Mattan S. Ben-Shachar" Date: Tue, 2 Sep 2025 22:21:00 +0300 Subject: [PATCH 01/36] new methods and cleaned up docs --- DESCRIPTION | 2 +- NAMESPACE | 14 +-- NEWS.md | 1 + R/bayesfactor-methods.R | 219 ++++++++++++++++++++++++++++++++++ R/bayesfactor_inclusion.R | 24 ++-- R/bayesfactor_models.R | 135 ++++++--------------- R/bayesfactor_parameters.R | 67 +++++------ R/bayesfactor_restricted.R | 70 ++++------- R/print.bayesfactor_models.R | 56 --------- R/si.R | 12 +- R/utils_bayesfactor.R | 33 ----- man/bayesfactor_inclusion.Rd | 35 +++++- man/bayesfactor_methods.Rd | 94 +++++++++++++++ man/bayesfactor_models.Rd | 83 ++++++++----- man/bayesfactor_parameters.Rd | 83 ++++++++----- man/bayesfactor_restricted.Rd | 64 +++++----- man/si.Rd | 45 ++++--- 17 files changed, 630 insertions(+), 407 deletions(-) create mode 100644 R/bayesfactor-methods.R delete mode 100644 R/print.bayesfactor_models.R create mode 100644 man/bayesfactor_methods.Rd diff --git a/DESCRIPTION b/DESCRIPTION index c6263c165..794b11c39 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: bayestestR Title: Understand and Describe Bayesian Models and Posterior Distributions -Version: 0.17.0.1 +Version: 0.17.0.2 Authors@R: c(person(given = "Dominique", family = "Makowski", diff --git a/NAMESPACE b/NAMESPACE index 555dfe969..0cbe9866a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,10 +2,7 @@ S3method(as.data.frame,bayestestR_mediation) S3method(as.data.frame,density) -S3method(as.double,bayesfactor_inclusion) -S3method(as.double,bayesfactor_models) -S3method(as.double,bayesfactor_parameters) -S3method(as.double,bayesfactor_restricted) +S3method(as.double,bayestestRBF) S3method(as.double,map_estimate) S3method(as.double,p_direction) S3method(as.double,p_map) @@ -17,16 +14,13 @@ S3method(as.list,bayestestR_eti) S3method(as.list,bayestestR_hdi) S3method(as.list,bayestestR_si) S3method(as.logical,bayesfactor_restricted) -S3method(as.matrix,bayesfactor_models) -S3method(as.matrix,bayesfactor_restricted) -S3method(as.numeric,bayesfactor_inclusion) -S3method(as.numeric,bayesfactor_models) -S3method(as.numeric,bayesfactor_parameters) -S3method(as.numeric,bayesfactor_restricted) +S3method(as.matrix,bayestestRBF) +S3method(as.numeric,bayestestRBF) S3method(as.numeric,map_estimate) S3method(as.numeric,p_direction) S3method(as.numeric,p_map) S3method(as.numeric,p_significance) +S3method(as.vector,bayestestRBF) S3method(as.vector,p_direction) S3method(bayesfactor_inclusion,BFBayesFactor) S3method(bayesfactor_inclusion,bayesfactor_models) diff --git a/NEWS.md b/NEWS.md index ef40e12de..3788b0879 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,7 @@ * `as.matrix()` for `bayesfactor_restricted()`, to obtain a matrix of Bayes factors between all restricted models. +* New dedicated docs for Bayes factor methods `?bayesfactor_methods` ## Changes diff --git a/R/bayesfactor-methods.R b/R/bayesfactor-methods.R new file mode 100644 index 000000000..47404cb7e --- /dev/null +++ b/R/bayesfactor-methods.R @@ -0,0 +1,219 @@ +#' Methods for Bayes factors +#' +#' @param x,object Bayes factor object +#' +#' @return +#' - `as.numeric()` / `as.double()` / `as.vector()`: a numeric vector of (log) +#' Bayes factors. +#' - `as.logical()`: a logical data frame with a column for each +#' order-restricted hypothesis. +#' - `as.matrix()`: a square matrix of (log) Bayes factors, with rows as +#' denominators and columns as numerators. +#' - `update()`: an updated `bayesfactor_models` object. +#' +#' @section Interpreting Bayes Factors: +#' A Bayes factor greater than 1 can be interpreted as evidence against the +#' null, at which one convention is that a Bayes factor greater than 3 can be +#' considered as "substantial" evidence against the null (and vice versa, a +#' Bayes factor smaller than 1/3 indicates substantial evidence in favor of the +#' null-model). See also `effectsize::interpret_bf()`. +#' +#' @section Transitivity of Bayes factors: +#' For multiple inputs (models or hypotheses), the function will return multiple +#' Bayes factors between each model and _the same_ reference model (the +#' `denominator` or un-restricted model). However, we can take advantage of the +#' transitivity of Bayes factors - where if we have two Bayes factors for Model +#' _A_ and model _B_ against the _same reference model C_, we can obtain a Bayes +#' factor for comparing model _A_ to model _B_ by dividing them: +#' \cr\cr +#' \deqn{BF_{AB} = \frac{BF_{AC}}{BF_{BC}} = \frac{\frac{ML_{A}}{ML_{C}}}{\frac{ML_{B}}{ML_{C}}} = \frac{ML_{A}}{ML_{B}}} +#' \cr\cr +#' (Where _ML_ is the _marginal likelihood_.) +#' \cr\cr +#' A full matrix comparing all models can be obtained with `as.matrix()`. +#' +#' @section Prior and posterior considerations: +#' In order to correctly and precisely estimate Bayes factors, a rule of thumb +#' are the 4 P's: **P**roper **P**riors and **P**lentiful +#' **P**osteriors. +#' \cr\cr +#' For the computation of Bayes factors, the model priors must be proper priors +#' (at the very least they should be *not flat*, and it is preferable that they +#' be *informative*) (Note that by default, `brms::brm()` uses flat priors for +#' fixed-effects); Wide priors result in smaller marginal likelihoods, and thus +#' models with wider priors are trivially less likely than models with narrower +#' priors - where, at the extreme, that a model with completely flat priors is +#' infinitely less favorable than a point null model (this is called *the +#' Jeffreys-Lindley-Bartlett paradox*). Thus, you should only ever try (or want) +#' to compute a Bayes factor when you have an informed prior. +#' \cr\cr +#' Additionally, for models using MCMC estimation the number of posterior +#' samples needed for testing is substantially larger than for estimation (the +#' default of 4000 samples may not be enough in many cases). A conservative rule +#' of thumb is to obtain 10 times more samples than would be required for +#' estimation (_Gronau, Singmann, & Wagenmakers, 2017_). If less than 40,000 +#' samples are detected, a warning is issued. +#' +#' @rdname bayesfactor_methods +#' @name bayesfactor_methods +NULL + +## as.matrix ------------------------- + +#' @param log Return log(BF) (default), or BF values. +#' +#' @rdname bayesfactor_methods +#' @export +as.matrix.bayestestRBF <- function(x, log = TRUE, ...) { + if (inherits(x, "bayesfactor_restricted")) { + log_BFs <- c(0, x$log_BF) + models <- c("(Un-restricted)", x$Hypothesis) + bf_fun <- "bayesfactor_restricted()" + + } else if (inherits(x, "bayesfactor_models")) { + log_BFs <- x$log_BF + models <- x$Model + bf_fun <- "bayesfactor_models()" + } else { + insight::format_error("Cannot extract a Bayes factor matrix from this object.") + } + + out <- -outer(log_BFs, log_BFs, FUN = "-") + rownames(out) <- colnames(out) <- models + + if (!log) { + out <- exp(out) + } + + class(out) <- c("bayesfactor_matrix", class(out)) + attr(out, "log_BF") <- log + attr(out, "bf_fun") <- bf_fun + out +} + +#' @export +print.bayesfactor_matrix <- function(x, log = FALSE, ...) { + orig_x <- x + orig_log <- attr(x, "log_BF") + + # Format values + x <- unclass(x) + if (log) { + if (!orig_log) x <- log(x) + sgn <- sign(x) < 0 + x <- insight::format_value(abs(x), digits = 2, ...) + + if (any(sgn)) { + x[sgn] <- paste0("-", x[sgn]) + } + + diag(x) <- "0" + } else { + if (orig_log) x <- exp(x) + x <- insight::format_bf(x, name = NULL, exact = TRUE, ...) + + diag(x) <- "1" + } + + df <- as.data.frame(x) + + # Model names + models <- colnames(df) + models[models == "1"] <- "(Intercept only)" + models <- paste0("[", seq_along(models), "] ", models) + + rownames(df) <- colnames(df) <- NULL + df <- cbind(modl = models, df) + colnames(df) <- c( + "Denominator\\Numerator", + paste0(" [", seq_along(models), "] ") + ) + + # caption and footer + caption <- switch( + attr(orig_x, "bf_fun"), + "bayesfactor_restricted()" = "# Bayes Factors for Restricted Models", + "# Bayes Factors for Model Comparison" + ) + footer <- if (log) c("\nBayes Factors are on the log-scale.\n", "red") + + out <- insight::export_table( + df, + caption = c(caption, "blue"), + footer = footer + ) + # Fix spacing + out <- sub("Denominator", " Denominator", out, fixed = TRUE) + + cat(out) + + invisible(orig_x) +} + + +## update ------------------------- + +#' @param subset Vector of model indices to keep or remove. +#' @param reference Index of model to reference to, or `"top"` to +#' reference to the best model, or `"bottom"` to reference to the worst +#' model. +#' +#' @rdname bayesfactor_methods +#' @export +update.bayesfactor_models <- function(object, subset = NULL, reference = NULL, ...) { + if (!is.null(reference)) { + if (reference == "top") { + reference <- which.max(object$log_BF) + } else if (reference == "bottom") { + reference <- which.min(object$log_BF) + } + object$log_BF <- object$log_BF - object$log_BF[reference] + attr(object, "denominator") <- reference + } + + denominator <- attr(object, "denominator") + + if (!is.null(subset)) { + if (all(subset < 0)) { + subset <- seq_len(nrow(object))[subset] + } + object_subset <- object[subset, ] + + if (denominator %in% subset) { + attr(object_subset, "denominator") <- which(denominator == subset) + } else { + object_subset <- rbind(object[denominator, ], object_subset) + attr(object_subset, "denominator") <- 1 + } + object <- object_subset + } + object +} + + +## as.numeric ------------------------------------------------------- + +#' @rdname bayesfactor_methods +#' @export +as.numeric.bayestestRBF <- function(x, log = FALSE, ...) { + out <- x[["log_BF"]] + if (!log) out <- exp(out) + return(out) +} + +#' @export +as.double.bayestestRBF <- as.numeric.bayestestRBF + +#' @export +as.vector.bayestestRBF <- as.numeric.bayestestRBF + +## as.logical ----------------------------------------------------------------- + +#' @param which Should the logical matrix be of the posterior or prior distribution(s)? +#' +#' @rdname bayesfactor_methods +#' @export +as.logical.bayesfactor_restricted <- function(x, which = c("posterior", "prior"), ...) { + which <- match.arg(which) + as.matrix(attr(x, "bool_results")[[which]]) +} \ No newline at end of file diff --git a/R/bayesfactor_inclusion.R b/R/bayesfactor_inclusion.R index 0176b7ed2..6588bbff9 100644 --- a/R/bayesfactor_inclusion.R +++ b/R/bayesfactor_inclusion.R @@ -1,7 +1,8 @@ #' Inclusion Bayes Factors for testing predictors across Bayesian models -#' -#' The `bf_*` function is an alias of the main function. For more info, see -#' [the Bayes factors vignette](https://easystats.github.io/bayestestR/articles/bayes_factors.html). +#' \cr\cr +#' The `bf_*` function is an alias of the main function. +#' \cr\cr +#' \strong{For more info, see [the Bayes factors vignette](https://easystats.github.io/bayestestR/articles/bayes_factors.html).} #' #' @author Mattan S. Ben-Shachar #' @param models An object of class [bayesfactor_models()] or `BFBayesFactor`. @@ -20,21 +21,28 @@ #' models with effect \eqn{X} more likely to have produced the observed data #' than models without effect \eqn{X}? #' -#' \subsection{Match Models}{ +#' ## Match Models #' If `match_models=FALSE` (default), Inclusion BFs are computed by comparing #' all models with a term against all models without that term. If `TRUE`, #' comparison is restricted to models that (1) do not include any interactions #' with the term of interest; (2) for interaction terms, averaging is done only -#' across models that containe the main effect terms from which the interaction +#' across models that contain the main effect terms from which the interaction #' term is comprised. -#' } #' -#' @inheritSection bayesfactor_parameters Interpreting Bayes Factors +#' ## Additional methods +#' The resulting output is supported by the following methods: +#' +#' - `as.numeric()`: Extract the (possibly log-)Bayes factor values. +#' +#' See [bayesfactor_methods]. +#' +#' @inheritSection bayesfactor_methods Interpreting Bayes Factors #' #' @note Random effects in the `lmer` style are converted to interaction terms: #' i.e., `(X|G)` will become the terms `1:G` and `X:G`. #' #' @seealso [weighted_posteriors()] for Bayesian parameter averaging. +#' @family Bayes factors #' #' @examplesIf require("BayesFactor") #' library(bayestestR) @@ -158,7 +166,7 @@ bayesfactor_inclusion.bayesfactor_models <- function(models, colnames(df.effect) <- c("p_prior", "p_posterior", "log_BF") rownames(df.effect) <- effnames - class(df.effect) <- c("bayesfactor_inclusion", class(df.effect)) + class(df.effect) <- c("bayestestRBF", "bayesfactor_inclusion", class(df.effect)) attr(df.effect, "matched") <- match_models attr(df.effect, "priorOdds") <- prior_odds diff --git a/R/bayesfactor_models.R b/R/bayesfactor_models.R index 2ceadcd77..12b3e4c79 100644 --- a/R/bayesfactor_models.R +++ b/R/bayesfactor_models.R @@ -1,7 +1,11 @@ #' Bayes Factors (BF) for model comparison #' #' @description This function computes or extracts Bayes factors from fitted -#' models. The `bf_*` function is an alias of the main function. +#' models. +#' \cr\cr +#' The `bf_*` function is an alias of the main function. +#' \cr\cr +#' \strong{For more info, see [the Bayes factors vignette](https://easystats.github.io/bayestestR/articles/bayes_factors.html).} #' #' @author Mattan S. Ben-Shachar #' @@ -14,11 +18,7 @@ #' @param denominator Either an integer indicating which of the models to use as #' the denominator, or a model to be used as a denominator. Ignored for #' `BFBayesFactor`. -#' @param object,x A [`bayesfactor_models()`] object. -#' @param subset Vector of model indices to keep or remove. -#' @param reference Index of model to reference to, or `"top"` to -#' reference to the best model, or `"bottom"` to reference to the worst -#' model. +#' #' @inheritParams hdi #' #' @note There is also a [`plot()`-method](https://easystats.github.io/see/articles/bayestestR.html) @@ -27,41 +27,31 @@ #' @details #' If the passed models are supported by **insight** the DV of all models will #' be tested for equality (else this is assumed to be true), and the models' -#' terms will be extracted (allowing for follow-up analysis with `bayesfactor_inclusion`). +#' terms will be extracted (allowing for follow-up analysis with [bayesfactor_inclusion]). #' #' - For `brmsfit` or `stanreg` models, Bayes factors are computed using the \CRANpkg{bridgesampling} package. #' - `brmsfit` models must have been fitted with `save_pars = save_pars(all = TRUE)`. #' - `stanreg` models must have been fitted with a defined `diagnostic_file`. -#' - For `BFBayesFactor`, `bayesfactor_models()` is mostly a wraparound `BayesFactor::extractBF()`. +#' - For `BFBayesFactor`, `bayesfactor_models()` is a wraparound `BayesFactor::extractBF()`. #' - For all other model types, Bayes factors are computed using the BIC approximation. #' Note that BICs are extracted from using [insight::get_loglikelihood], see documentation #' there for options for dealing with transformed responses and REML estimation. #' -#' In order to correctly and precisely estimate Bayes factors, a rule of thumb -#' are the 4 P's: **P**roper **P**riors and **P**lentiful -#' **P**osteriors. How many? The number of posterior samples needed for -#' testing is substantially larger than for estimation (the default of 4000 -#' samples may not be enough in many cases). A conservative rule of thumb is to -#' obtain 10 times more samples than would be required for estimation -#' (_Gronau, Singmann, & Wagenmakers, 2017_). If less than 40,000 samples -#' are detected, `bayesfactor_models()` gives a warning. +#' ## Additional methods +#' The resulting output is supported by the following methods: #' -#' See also [the Bayes factors vignette](https://easystats.github.io/bayestestR/articles/bayes_factors.html). +#' - `as.matrix()`: Extract a full matrix of (log-)Bayes factors between all +#' models (using the transitivity of Bayes factors). +#' - `update()`: subset and/or re-reference the Bayes factors to a different model. +#' - `as.numeric()`: Extract the (possibly log-)Bayes factor values. #' -#' @section Transitivity of Bayes factors: -#' For multiple inputs (models or hypotheses), the function will return multiple -#' Bayes factors between each model and _the same_ reference model (the -#' `denominator` or un-restricted model). However, we can take advantage of the -#' transitivity of Bayes factors - where if we have two Bayes factors for Model -#' _A_ and model _B_ against the _same reference model C_, we can obtain a Bayes -#' factor for comparing model _A_ to model _B_ by dividing them: -#' \cr\cr -#' \deqn{BF_{AB} = \frac{BF_{AC}}{BF_{BC}} = \frac{\frac{ML_{A}}{ML_{C}}}{\frac{ML_{B}}{ML_{C}}} = \frac{ML_{A}}{ML_{B}}} -#' \cr\cr -#' A full matrix comparing all models can be obtained with `as.matrix()` (see -#' examples). +#' See examples and [bayesfactor_methods]. +#' +#' @inheritSection bayesfactor_methods Prior and posterior considerations #' -#' @inheritSection bayesfactor_parameters Interpreting Bayes Factors +#' @inheritSection bayesfactor_methods Transitivity of Bayes factors +#' +#' @inheritSection bayesfactor_methods Interpreting Bayes Factors #' #' @return A data frame containing the models' formulas (reconstructed fixed and #' random effects) and their `log(BF)`s (Use `as.numeric()` to extract the @@ -166,6 +156,9 @@ #' Statistical Evidence in Experimental Psychology: An Empirical Comparison Using 855 t Tests. #' Perspectives on Psychological Science, 6(3), 291–298. \doi{10.1177/1745691611406923} #' +#' @seealso [bayesfactor_inclusion()] for testing predictors across Bayesian models. +#' @family Bayes factors +#' #' @export bayesfactor_models <- function(..., denominator = 1, verbose = TRUE) { UseMethod("bayesfactor_models") @@ -224,9 +217,9 @@ bayesfactor_models.default <- function(..., denominator = 1, verbose = TRUE) { # Get BIC if (were_checked && estimator == "REML" && - any(vapply(mods, insight::is_mixed_model, TRUE)) && - !isTRUE(attr(model_objects, "same_fixef")) && - verbose) { + any(vapply(mods, insight::is_mixed_model, TRUE)) && + !isTRUE(attr(model_objects, "same_fixef")) && + verbose) { insight::format_warning(paste( "Information criteria (like BIC) based on REML fits (i.e. `estimator=\"REML\"`)", "are not recommended for comparison between models with different fixed effects.", @@ -257,10 +250,10 @@ bayesfactor_models.default <- function(..., denominator = 1, verbose = TRUE) { ) .bf_models_output(res, - denominator = denominator, - bf_method = "BIC approximation", - unsupported_models = !all(supported_models), - model_names = names(mods) + denominator = denominator, + bf_method = "BIC approximation", + unsupported_models = !all(supported_models), + model_names = names(mods) ) } @@ -290,9 +283,9 @@ bayesfactor_models.default <- function(..., denominator = 1, verbose = TRUE) { } .bf_models_output(res, - denominator = denominator, - bf_method = bf_method, - unsupported_models = unsupported_models + denominator = denominator, + bf_method = bf_method, + unsupported_models = unsupported_models ) } @@ -304,8 +297,8 @@ bayesfactor_models.default <- function(..., denominator = 1, verbose = TRUE) { # Test that all is good: resps <- lapply(mods, insight::get_response) from_same_data_as_den <- sapply(resps[-denominator], - identical, - y = resps[[denominator]] + identical, + y = resps[[denominator]] ) if (!all(from_same_data_as_den)) { @@ -401,62 +394,12 @@ bayesfactor_models.BFBayesFactor <- function(..., verbose = TRUE) { ) .bf_models_output(res, - denominator = 1, - bf_method = "JZS (BayesFactor)", - unsupported_models = !inherits(models@denominator, "BFlinearModel") + denominator = 1, + bf_method = "JZS (BayesFactor)", + unsupported_models = !inherits(models@denominator, "BFlinearModel") ) } - -# Methods ----------------------------------------------------------------- - -#' @rdname bayesfactor_models -#' @export -update.bayesfactor_models <- function(object, subset = NULL, reference = NULL, ...) { - if (!is.null(reference)) { - if (reference == "top") { - reference <- which.max(object$log_BF) - } else if (reference == "bottom") { - reference <- which.min(object$log_BF) - } - object$log_BF <- object$log_BF - object$log_BF[reference] - attr(object, "denominator") <- reference - } - - denominator <- attr(object, "denominator") - - if (!is.null(subset)) { - if (all(subset < 0)) { - subset <- seq_len(nrow(object))[subset] - } - object_subset <- object[subset, ] - - if (denominator %in% subset) { - attr(object_subset, "denominator") <- which(denominator == subset) - } else { - object_subset <- rbind(object[denominator, ], object_subset) - attr(object_subset, "denominator") <- 1 - } - object <- object_subset - } - object -} - - -#' @rdname bayesfactor_models -#' @export -as.matrix.bayesfactor_models <- function(x, ...) { - out <- -outer(x$log_BF, x$log_BF, FUN = "-") - rownames(out) <- colnames(out) <- x$Model - - # out <- exp(out) - - class(out) <- c("bayesfactor_matrix", class(out)) - attr(out, "log_BF") <- TRUE - attr(out, "bf_fun") <- "bayesfactor_models()" - out -} - # Helpers ----------------------------------------------------------------- #' @keywords internal @@ -502,7 +445,7 @@ as.matrix.bayesfactor_models <- function(x, ...) { attr(res, "BF_method") <- bf_method attr(res, "unsupported_models") <- unsupported_models attr(res, "model_names") <- model_names - class(res) <- c("bayesfactor_models", "see_bayesfactor_models", class(res)) + class(res) <- c("bayestestRBF", "bayesfactor_models", "see_bayesfactor_models", class(res)) res } diff --git a/R/bayesfactor_parameters.R b/R/bayesfactor_parameters.R index 9620bf30d..458228fe6 100644 --- a/R/bayesfactor_parameters.R +++ b/R/bayesfactor_parameters.R @@ -3,7 +3,7 @@ #' This method computes Bayes factors against the null (either a point or an #' interval), based on prior and posterior samples of a single parameter. This #' Bayes factor indicates the degree by which the mass of the posterior -#' distribution has shifted further away from or closer to the null value(s) +#' distribution has shifted away from or closer to the null value(s) #' (relative to the prior distribution), thus indicating if the null value has #' become less or more likely given the observed data. #' \cr \cr @@ -15,19 +15,12 @@ #' model in which the tested parameter has been restricted to the point null #' (Wagenmakers et al., 2010; Heck, 2019). #' \cr \cr -#' Note that the `logspline` package is used for estimating densities and -#' probabilities, and must be installed for the function to work. -#' \cr \cr -#' `bayesfactor_pointnull()` and `bayesfactor_rope()` are wrappers -#' around `bayesfactor_parameters` with different defaults for the null to -#' be tested against (a point and a range, respectively). Aliases of the main -#' functions are prefixed with `bf_*`, like `bf_parameters()` or -#' `bf_pointnull()`. +#' `bayesfactor_pointnull()` and `bayesfactor_rope()` are wrappers around +#' `bayesfactor_parameters()` with different defaults for the null to be tested +#' against (a point and a range, respectively; see details). The `bf_*` +#' functions are aliases of the main functions. #' \cr \cr -#' \strong{For more info, in particular on specifying correct priors for factors -#' with more than 2 levels, see -#' [the -#' Bayes factors vignette](https://easystats.github.io/bayestestR/articles/bayes_factors.html).} +#' \strong{For more info, see [the Bayes factors vignette](https://easystats.github.io/bayestestR/articles/bayes_factors.html).} #' #' @param posterior A numerical vector, `stanreg` / `brmsfit` object, #' `emmGrid` or a data frame - representing a posterior distribution(s) @@ -42,8 +35,6 @@ #' arguments to internal [logspline::logspline()].) #' @inheritParams hdi #' -#' @inheritSection hdi Model components -#' #' @return A data frame containing the (log) Bayes factor representing evidence #' *against* the null (Use `as.numeric()` to extract the non-log Bayes #' factors; see examples). @@ -56,8 +47,12 @@ #' @details #' This method is used to compute Bayes factors based on prior and posterior #' distributions. +#' \cr \cr +#' Note that the `logspline` package is used for estimating densities and +#' probabilities, and must be installed for the function to work. +#' #' -#' \subsection{One-sided & Dividing Tests (setting an order restriction)}{ +#' ## One-sided & Dividing Tests (setting an order restriction): #' One sided tests (controlled by `direction`) are conducted by restricting #' the prior and posterior of the non-null values (the "alternative") to one #' side of the null only (\cite{Morey & Wagenmakers, 2014}). For example, if we @@ -72,25 +67,23 @@ #' opposing one-sided hypotheses (\cite{Morey & Wagenmakers, 2014}). For #' example, for a Bayes factor comparing the "null" of `<0` to the alternative #' `>0`, we would set `bayesfactor_parameters(null = c(-Inf, 0))`. -#' } #' -#' @section Setting the correct `prior`: -#' For the computation of Bayes factors, the model priors must be proper priors -#' (at the very least they should be *not flat*, and it is preferable that -#' they be *informative*); As the priors for the alternative get wider, the -#' likelihood of the null value(s) increases, to the extreme that for completely -#' flat priors the null is infinitely more favorable than the alternative (this -#' is called *the Jeffreys-Lindley-Bartlett paradox*). Thus, you should -#' only ever try (or want) to compute a Bayes factor when you have an informed -#' prior. -#' \cr\cr -#' (Note that by default, `brms::brm()` uses flat priors for fixed-effects; -#' See example below.) -#' \cr\cr +#' +#' ## Additional methods +#' The resulting output is supported by the following methods: +#' +#' - `as.numeric()`: Extract the (possibly log-)Bayes factor values. +#' +#' See [bayesfactor_methods]. +#' +#' @inheritSection bayesfactor_methods Prior and posterior considerations +#' +#' @section Obtaining prior samples: +#' #' It is important to provide the correct `prior` for meaningful results, #' to match the `posterior`-type input: #' -#' - **A numeric vector** - `prior` should also be a _numeric vector_, representing the prior-estimate. +#' - **A numeric vector** - `prior` should also be a _numeric vector_, representing the prior-distribution #' - **A data frame** - `prior` should also be a _data frame_, representing the prior-estimates, in matching column order. #' - If `rvar_col` is specified, `prior` should be _the name of an `rvar` column_ that represents the prior-estimates. #' - **Supported Bayesian model (`stanreg`, `brmsfit`, etc.)** @@ -104,12 +97,9 @@ #' will try to "unupdate" the estimates (not supported if the estimates have undergone #' any transformations -- `"log"`, `"response"`, etc. -- or any `regrid`ing). #' -#' @section Interpreting Bayes Factors: -#' A Bayes factor greater than 1 can be interpreted as evidence against the -#' null, at which one convention is that a Bayes factor greater than 3 can be -#' considered as "substantial" evidence against the null (and vice versa, a -#' Bayes factor smaller than 1/3 indicates substantial evidence in favor of the -#' null-model) (\cite{Wetzels et al. 2011}). +#' @inheritSection bayesfactor_methods Interpreting Bayes Factors +#' +#' @inheritSection hdi Model components #' #' @examplesIf require("logspline") #' library(bayestestR) @@ -186,6 +176,8 @@ #' #' @author Mattan S. Ben-Shachar #' +#' @family Bayes factors +#' #' @export bayesfactor_parameters <- function(posterior, prior = NULL, @@ -464,6 +456,7 @@ bayesfactor_parameters.data.frame <- function(posterior, ) class(bf_val) <- unique(c( + "bayestestRBF", "bayesfactor_parameters", "see_bayesfactor_parameters", class(bf_val) diff --git a/R/bayesfactor_restricted.R b/R/bayesfactor_restricted.R index f84aedf3f..81ce20d18 100644 --- a/R/bayesfactor_restricted.R +++ b/R/bayesfactor_restricted.R @@ -2,11 +2,10 @@ #' #' This method computes Bayes factors for comparing a model with an order restrictions on its parameters #' with the fully unrestricted model. *Note that this method should only be used for confirmatory analyses*. -#' \cr \cr +#' \cr\cr #' The `bf_*` function is an alias of the main function. #' \cr \cr -#' \strong{For more info, in particular on specifying correct priors for factors with more than 2 levels, -#' see [the Bayes factors vignette](https://easystats.github.io/bayestestR/articles/bayes_factors.html).} +#' \strong{For more info, see [the Bayes factors vignette](https://easystats.github.io/bayestestR/articles/bayes_factors.html).} #' #' @param posterior A `stanreg` / `brmsfit` object, `emmGrid` or a data frame - representing #' a posterior distribution(s) from (see Details). @@ -21,20 +20,28 @@ #' (Though it is possible to use `bayesfactor_restricted()` to test interval restrictions, #' it is more suitable for testing order restrictions; see examples). #' -#' @inheritSection bayesfactor_parameters Setting the correct `prior` +#' ## Additional methods +#' The resulting output is supported by the following methods: +#' +#' - `as.matrix()`: Extract a full matrix of (log-)Bayes factors between all +#' models (using the transitivity of Bayes factors). +#' - `as.logical()`: Extract boolean vectors indicating which (prior/posterior) +#' samples are included in the hypothesized restriction. +#' - `as.numeric()`: Extract the (possibly log-)Bayes factor values. +#' +#' See examples and [bayesfactor_methods]. +#' +#' @inheritSection bayesfactor_parameters Obtaining prior samples #' -#' @inheritSection bayesfactor_models Transitivity of Bayes factors +#' @inheritSection bayesfactor_methods Transitivity of Bayes factors #' -#' @inheritSection bayesfactor_parameters Interpreting Bayes Factors +#' @inheritSection bayesfactor_methods Interpreting Bayes Factors #' #' @return A data frame containing the (log) Bayes factor representing evidence #' *against* the un-restricted model (Use `as.numeric()` to extract the #' non-log Bayes factors; see examples). (A `bool_results` attribute contains #' the results for each sample, indicating if they are included or not in the #' hypothesized restriction.) -#' \cr\cr -#' For `as.matrix()` a square matrix of (log) Bayes factors, with rows as -#' denominators and columns as numerators. #' #' @examples #' set.seed(444) @@ -124,6 +131,8 @@ #' - Morey, R. D. (Jan, 2015). Multiple Comparisons with BayesFactor, Part 2 – order restrictions. #' Retrieved from https://richarddmorey.org/category/order-restrictions/. #' +#' @family Bayes factors +#' #' @export bayesfactor_restricted <- function(posterior, ...) { UseMethod("bayesfactor_restricted") @@ -141,8 +150,8 @@ bayesfactor_restricted.stanreg <- function(posterior, hypothesis, prior = NULL, component = "conditional", ...) { samps <- .clean_priors_and_posteriors(posterior, prior, - effects = effects, component = component, - verbose = verbose + effects = effects, component = component, + verbose = verbose ) # Get savage-dickey BFs @@ -161,7 +170,7 @@ bayesfactor_restricted.brmsfit <- bayesfactor_restricted.stanreg bayesfactor_restricted.blavaan <- function(posterior, hypothesis, prior = NULL, verbose = TRUE, ...) { samps <- .clean_priors_and_posteriors(posterior, prior, - verbose = verbose + verbose = verbose ) # Get savage-dickey BFs @@ -178,7 +187,7 @@ bayesfactor_restricted.emmGrid <- function(posterior, hypothesis, prior = NULL, verbose = TRUE, ...) { samps <- .clean_priors_and_posteriors(posterior, prior, - verbose = verbose + verbose = verbose ) bayesfactor_restricted.data.frame( @@ -262,6 +271,7 @@ bayesfactor_restricted.data.frame <- function(posterior, hypothesis, prior = NUL attr(res, "bool_results") <- list(posterior = posterior_l, prior = prior_l) class(res) <- unique(c( + "bayestestRBF", "bayesfactor_restricted", class(res) )) @@ -273,40 +283,12 @@ bayesfactor_restricted.data.frame <- function(posterior, hypothesis, prior = NUL #' @export bayesfactor_restricted.draws <- function(posterior, hypothesis, prior = NULL, ...) { bayesfactor_restricted(.posterior_draws_to_df(posterior), - hypothesis = hypothesis, - prior = if (!is.null(prior)) .posterior_draws_to_df(prior), - ... + hypothesis = hypothesis, + prior = if (!is.null(prior)) .posterior_draws_to_df(prior), + ... ) } #' @export bayesfactor_restricted.rvar <- bayesfactor_restricted.draws - -# Methods ----------------------------------------------------------------- - -#' @export -#' @rdname bayesfactor_restricted -#' @param x An object of class `bayesfactor_restricted` -#' @param which Should the logical matrix be of the posterior or prior distribution(s)? -as.logical.bayesfactor_restricted <- function(x, which = c("posterior", "prior"), ...) { - which <- match.arg(which) - as.matrix(attr(x, "bool_results")[[which]]) -} - -#' @rdname bayesfactor_restricted -#' @export -as.matrix.bayesfactor_restricted <- function(x, ...) { - log_BFs <- c(0, x$log_BF) - models <- c("(Un-restricted)", x$Hypothesis) - - out <- -outer(log_BFs, log_BFs, FUN = "-") - rownames(out) <- colnames(out) <- models - - # out <- exp(out) - - class(out) <- c("bayesfactor_matrix", class(out)) - attr(out, "log_BF") <- TRUE - attr(out, "bf_fun") <- "bayesfactor_restricted()" - out -} \ No newline at end of file diff --git a/R/print.bayesfactor_models.R b/R/print.bayesfactor_models.R deleted file mode 100644 index 5de4de1af..000000000 --- a/R/print.bayesfactor_models.R +++ /dev/null @@ -1,56 +0,0 @@ -#' @export -print.bayesfactor_matrix <- function(x, log = FALSE, exact = TRUE, ...) { - orig_x <- x - - # Format values - x <- unclass(x) - if (log) { - sgn <- sign(x) < 0 - x <- insight::format_value(abs(x), digits = 2, ...) - - if (any(sgn)) { - x[sgn] <- paste0("-", x[sgn]) - } - - diag(x) <- "0" - } else { - x <- exp(x) - x <- insight::format_bf(x, name = NULL, exact = exact, ...) - - diag(x) <- "1" - } - - df <- as.data.frame(x) - - # Model names - models <- colnames(df) - models[models == "1"] <- "(Intercept only)" - models <- paste0("[", seq_along(models), "] ", models) - - rownames(df) <- colnames(df) <- NULL - df <- cbind(modl = models, df) - colnames(df) <- c( - "Denominator\\Numerator", - paste0(" [", seq_along(models), "] ") - ) - - # caption and footer - caption <- switch( - attr(orig_x, "bf_fun"), - "bayesfactor_restricted()" = "# Bayes Factors for Restricted Models", - "# Bayes Factors for Model Comparison" - ) - footer <- if (log) c("\nBayes Factors are on the log-scale.\n", "red") - - out <- insight::export_table( - df, - caption = c(caption, "blue"), - footer = footer - ) - # Fix spacing - out <- sub("Denominator", " Denominator", out, fixed = TRUE) - - cat(out) - - invisible(orig_x) -} diff --git a/R/si.R b/R/si.R index b5cdd0e61..d7ad5b40d 100644 --- a/R/si.R +++ b/R/si.R @@ -5,9 +5,8 @@ #' updating factor greater or equal than *k*. From the perspective of the Savage-Dickey Bayes factor, testing #' against a point null hypothesis for any value within the support interval will yield a Bayes factor smaller #' than *1/k*. -#' -#' **For more info, in particular on specifying correct priors for factors with more than 2 levels, -#' see [the Bayes factors vignette](https://easystats.github.io/bayestestR/articles/bayes_factors.html).** +#' \cr\cr +#' \strong{For more info, see [the Bayes factors vignette](https://easystats.github.io/bayestestR/articles/bayes_factors.html).} #' #' @param BF The amount of support required to be included in the support interval. #' @inheritParams bayesfactor_parameters @@ -16,9 +15,6 @@ #' @family ci #' #' @details This method is used to compute support intervals based on prior and posterior distributions. -#' For the computation of support intervals, the model priors must be proper priors (at the very least -#' they should be *not flat*, and it is preferable that they be *informative* - note -#' that by default, `brms::brm()` uses flat priors for fixed-effects; see example below). #' #' @section Choosing a value of `BF`: #' The choice of `BF` (the level of support) depends on what we want our interval @@ -32,7 +28,9 @@ #' E.g., if an SI (BF = 1/3) excludes 0, the Bayes factor against the point-null #' will be larger than 3. #' -#' @inheritSection bayesfactor_parameters Setting the correct `prior` +#' @inheritSection bayesfactor_methods Prior and posterior considerations +#' +#' @inheritSection bayesfactor_parameters Obtaining prior samples #' #' @note There is also a [`plot()`-method](https://easystats.github.io/see/articles/bayestestR.html) implemented in the \href{https://easystats.github.io/see/}{\pkg{see}-package}. #' diff --git a/R/utils_bayesfactor.R b/R/utils_bayesfactor.R index 14c51d6aa..408fcd41f 100644 --- a/R/utils_bayesfactor.R +++ b/R/utils_bayesfactor.R @@ -387,39 +387,6 @@ ) } -# As numeric vector ------------------------------------------------------- - -#' @export -as.numeric.bayesfactor_inclusion <- function(x, log = FALSE, ...) { - out <- x[["log_BF"]] - if (!log) out <- exp(out) - return(out) -} - -#' @export -as.numeric.bayesfactor_models <- as.numeric.bayesfactor_inclusion - -#' @export -as.numeric.bayesfactor_parameters <- as.numeric.bayesfactor_inclusion - -#' @export -as.numeric.bayesfactor_restricted <- as.numeric.bayesfactor_inclusion - -## Double: - -#' @export -as.double.bayesfactor_inclusion <- as.numeric.bayesfactor_inclusion - -#' @export -as.double.bayesfactor_models <- as.numeric.bayesfactor_inclusion - -#' @export -as.double.bayesfactor_parameters <- as.numeric.bayesfactor_inclusion - -#' @export -as.double.bayesfactor_restricted <- as.numeric.bayesfactor_inclusion - - # logspline --------------------------------------------------------------- #' @keywords internal diff --git a/man/bayesfactor_inclusion.Rd b/man/bayesfactor_inclusion.Rd index 44de7d77e..b7ec3f95f 100644 --- a/man/bayesfactor_inclusion.Rd +++ b/man/bayesfactor_inclusion.Rd @@ -3,7 +3,11 @@ \name{bayesfactor_inclusion} \alias{bayesfactor_inclusion} \alias{bf_inclusion} -\title{Inclusion Bayes Factors for testing predictors across Bayesian models} +\title{Inclusion Bayes Factors for testing predictors across Bayesian models +\cr\cr +The \verb{bf_*} function is an alias of the main function. +\cr\cr +\strong{For more info, see \href{https://easystats.github.io/bayestestR/articles/bayes_factors.html}{the Bayes factors vignette}.}} \usage{ bayesfactor_inclusion(models, match_models = FALSE, prior_odds = NULL, ...) @@ -25,8 +29,11 @@ log(BF) for each effect (Use \code{as.numeric()} to extract the non-log Bayes factors; see examples). } \description{ -The \verb{bf_*} function is an alias of the main function. For more info, see -\href{https://easystats.github.io/bayestestR/articles/bayes_factors.html}{the Bayes factors vignette}. +Inclusion Bayes Factors for testing predictors across Bayesian models +\cr\cr +The \verb{bf_*} function is an alias of the main function. +\cr\cr +\strong{For more info, see \href{https://easystats.github.io/bayestestR/articles/bayes_factors.html}{the Bayes factors vignette}.} } \details{ Inclusion Bayes factors answer the question: Are the observed data @@ -34,15 +41,25 @@ more probable under models with a particular effect, than they are under models without that particular effect? In other words, on average - are models with effect \eqn{X} more likely to have produced the observed data than models without effect \eqn{X}? - \subsection{Match Models}{ + If \code{match_models=FALSE} (default), Inclusion BFs are computed by comparing all models with a term against all models without that term. If \code{TRUE}, comparison is restricted to models that (1) do not include any interactions with the term of interest; (2) for interaction terms, averaging is done only -across models that containe the main effect terms from which the interaction +across models that contain the main effect terms from which the interaction term is comprised. } + +\subsection{Additional methods}{ + +The resulting output is supported by the following methods: +\itemize{ +\item \code{as.numeric()}: Extract the (possibly log-)Bayes factor values. +} + +See \link{bayesfactor_methods}. +} } \note{ Random effects in the \code{lmer} style are converted to interaction terms: @@ -54,7 +71,7 @@ A Bayes factor greater than 1 can be interpreted as evidence against the null, at which one convention is that a Bayes factor greater than 3 can be considered as "substantial" evidence against the null (and vice versa, a Bayes factor smaller than 1/3 indicates substantial evidence in favor of the -null-model) (\cite{Wetzels et al. 2011}). +null-model). See also \code{effectsize::interpret_bf()}. } \examples{ @@ -97,7 +114,13 @@ for variable selection and model averaging. Journal of Computational and Graphic } \seealso{ \code{\link[=weighted_posteriors]{weighted_posteriors()}} for Bayesian parameter averaging. + +Other Bayes factors: +\code{\link{bayesfactor_models}()}, +\code{\link{bayesfactor_parameters}()}, +\code{\link{bayesfactor_restricted}()} } \author{ Mattan S. Ben-Shachar } +\concept{Bayes factors} diff --git a/man/bayesfactor_methods.Rd b/man/bayesfactor_methods.Rd new file mode 100644 index 000000000..926836d2c --- /dev/null +++ b/man/bayesfactor_methods.Rd @@ -0,0 +1,94 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bayesfactor-methods.R +\name{bayesfactor_methods} +\alias{bayesfactor_methods} +\alias{as.matrix.bayestestRBF} +\alias{update.bayesfactor_models} +\alias{as.numeric.bayestestRBF} +\alias{as.logical.bayesfactor_restricted} +\title{Methods for Bayes factors} +\usage{ +\method{as.matrix}{bayestestRBF}(x, log = TRUE, ...) + +\method{update}{bayesfactor_models}(object, subset = NULL, reference = NULL, ...) + +\method{as.numeric}{bayestestRBF}(x, log = FALSE, ...) + +\method{as.logical}{bayesfactor_restricted}(x, which = c("posterior", "prior"), ...) +} +\arguments{ +\item{x, object}{Bayes factor object} + +\item{log}{Return log(BF) (default), or BF values.} + +\item{subset}{Vector of model indices to keep or remove.} + +\item{reference}{Index of model to reference to, or \code{"top"} to +reference to the best model, or \code{"bottom"} to reference to the worst +model.} + +\item{which}{Should the logical matrix be of the posterior or prior distribution(s)?} +} +\value{ +\itemize{ +\item \code{as.numeric()} / \code{as.double()} / \code{as.vector()}: a numeric vector of (log) +Bayes factors. +\item \code{as.logical()}: a logical data frame with a column for each +order-restricted hypothesis. +\item \code{as.matrix()}: a square matrix of (log) Bayes factors, with rows as +denominators and columns as numerators. +\item \code{update()}: an updated \code{bayesfactor_models} object. +} +} +\description{ +Methods for Bayes factors +} +\section{Interpreting Bayes Factors}{ + +A Bayes factor greater than 1 can be interpreted as evidence against the +null, at which one convention is that a Bayes factor greater than 3 can be +considered as "substantial" evidence against the null (and vice versa, a +Bayes factor smaller than 1/3 indicates substantial evidence in favor of the +null-model). See also \code{effectsize::interpret_bf()}. +} + +\section{Transitivity of Bayes factors}{ + +For multiple inputs (models or hypotheses), the function will return multiple +Bayes factors between each model and \emph{the same} reference model (the +\code{denominator} or un-restricted model). However, we can take advantage of the +transitivity of Bayes factors - where if we have two Bayes factors for Model +\emph{A} and model \emph{B} against the \emph{same reference model C}, we can obtain a Bayes +factor for comparing model \emph{A} to model \emph{B} by dividing them: +\cr\cr +\deqn{BF_{AB} = \frac{BF_{AC}}{BF_{BC}} = \frac{\frac{ML_{A}}{ML_{C}}}{\frac{ML_{B}}{ML_{C}}} = \frac{ML_{A}}{ML_{B}}} +\cr\cr +(Where \emph{ML} is the \emph{marginal likelihood}.) +\cr\cr +A full matrix comparing all models can be obtained with \code{as.matrix()}. +} + +\section{Prior and posterior considerations}{ + +In order to correctly and precisely estimate Bayes factors, a rule of thumb +are the 4 P's: \strong{P}roper \strong{P}riors and \strong{P}lentiful +\strong{P}osteriors. +\cr\cr +For the computation of Bayes factors, the model priors must be proper priors +(at the very least they should be \emph{not flat}, and it is preferable that they +be \emph{informative}) (Note that by default, \code{brms::brm()} uses flat priors for +fixed-effects); Wide priors result in smaller marginal likelihoods, and thus +models with wider priors are trivially less likely than models with narrower +priors - where, at the extreme, that a model with completely flat priors is +infinitely less favorable than a point null model (this is called \emph{the +Jeffreys-Lindley-Bartlett paradox}). Thus, you should only ever try (or want) +to compute a Bayes factor when you have an informed prior. +\cr\cr +Additionally, for models using MCMC estimation the number of posterior +samples needed for testing is substantially larger than for estimation (the +default of 4000 samples may not be enough in many cases). A conservative rule +of thumb is to obtain 10 times more samples than would be required for +estimation (\emph{Gronau, Singmann, & Wagenmakers, 2017}). If less than 40,000 +samples are detected, a warning is issued. +} + diff --git a/man/bayesfactor_models.Rd b/man/bayesfactor_models.Rd index 0ea6b807a..5d4a311ef 100644 --- a/man/bayesfactor_models.Rd +++ b/man/bayesfactor_models.Rd @@ -4,8 +4,6 @@ \alias{bayesfactor_models} \alias{bf_models} \alias{bayesfactor_models.default} -\alias{update.bayesfactor_models} -\alias{as.matrix.bayesfactor_models} \title{Bayes Factors (BF) for model comparison} \usage{ bayesfactor_models(..., denominator = 1, verbose = TRUE) @@ -13,10 +11,6 @@ bayesfactor_models(..., denominator = 1, verbose = TRUE) bf_models(..., denominator = 1, verbose = TRUE) \method{bayesfactor_models}{default}(..., denominator = 1, verbose = TRUE) - -\method{update}{bayesfactor_models}(object, subset = NULL, reference = NULL, ...) - -\method{as.matrix}{bayesfactor_models}(x, ...) } \arguments{ \item{...}{Fitted models (see details), all fit on the same data, or a single @@ -33,14 +27,6 @@ the denominator, or a model to be used as a denominator. Ignored for \code{BFBayesFactor}.} \item{verbose}{Toggle off warnings.} - -\item{object, x}{A \code{\link[=bayesfactor_models]{bayesfactor_models()}} object.} - -\item{subset}{Vector of model indices to keep or remove.} - -\item{reference}{Index of model to reference to, or \code{"top"} to -reference to the best model, or \code{"bottom"} to reference to the worst -model.} } \value{ A data frame containing the models' formulas (reconstructed fixed and @@ -52,39 +38,68 @@ denominators and columns as numerators. } \description{ This function computes or extracts Bayes factors from fitted -models. The \verb{bf_*} function is an alias of the main function. +models. +\cr\cr +The \verb{bf_*} function is an alias of the main function. +\cr\cr +\strong{For more info, see \href{https://easystats.github.io/bayestestR/articles/bayes_factors.html}{the Bayes factors vignette}.} } \details{ If the passed models are supported by \strong{insight} the DV of all models will be tested for equality (else this is assumed to be true), and the models' -terms will be extracted (allowing for follow-up analysis with \code{bayesfactor_inclusion}). +terms will be extracted (allowing for follow-up analysis with \link{bayesfactor_inclusion}). \itemize{ \item For \code{brmsfit} or \code{stanreg} models, Bayes factors are computed using the \CRANpkg{bridgesampling} package. \itemize{ \item \code{brmsfit} models must have been fitted with \code{save_pars = save_pars(all = TRUE)}. \item \code{stanreg} models must have been fitted with a defined \code{diagnostic_file}. } -\item For \code{BFBayesFactor}, \code{bayesfactor_models()} is mostly a wraparound \code{BayesFactor::extractBF()}. +\item For \code{BFBayesFactor}, \code{bayesfactor_models()} is a wraparound \code{BayesFactor::extractBF()}. \item For all other model types, Bayes factors are computed using the BIC approximation. Note that BICs are extracted from using \link[insight:get_loglikelihood]{insight::get_loglikelihood}, see documentation there for options for dealing with transformed responses and REML estimation. } +\subsection{Additional methods}{ -In order to correctly and precisely estimate Bayes factors, a rule of thumb -are the 4 P's: \strong{P}roper \strong{P}riors and \strong{P}lentiful -\strong{P}osteriors. How many? The number of posterior samples needed for -testing is substantially larger than for estimation (the default of 4000 -samples may not be enough in many cases). A conservative rule of thumb is to -obtain 10 times more samples than would be required for estimation -(\emph{Gronau, Singmann, & Wagenmakers, 2017}). If less than 40,000 samples -are detected, \code{bayesfactor_models()} gives a warning. +The resulting output is supported by the following methods: +\itemize{ +\item \code{as.matrix()}: Extract a full matrix of (log-)Bayes factors between all +models (using the transitivity of Bayes factors). +\item \code{update()}: subset and/or re-reference the Bayes factors to a different model. +\item \code{as.numeric()}: Extract the (possibly log-)Bayes factor values. +} -See also \href{https://easystats.github.io/bayestestR/articles/bayes_factors.html}{the Bayes factors vignette}. +See examples and \link{bayesfactor_methods}. +} } \note{ There is also a \href{https://easystats.github.io/see/articles/bayestestR.html}{\code{plot()}-method} implemented in the \href{https://easystats.github.io/see/}{\pkg{see}-package}. } +\section{Prior and posterior considerations}{ + +In order to correctly and precisely estimate Bayes factors, a rule of thumb +are the 4 P's: \strong{P}roper \strong{P}riors and \strong{P}lentiful +\strong{P}osteriors. +\cr\cr +For the computation of Bayes factors, the model priors must be proper priors +(at the very least they should be \emph{not flat}, and it is preferable that they +be \emph{informative}) (Note that by default, \code{brms::brm()} uses flat priors for +fixed-effects); Wide priors result in smaller marginal likelihoods, and thus +models with wider priors are trivially less likely than models with narrower +priors - where, at the extreme, that a model with completely flat priors is +infinitely less favorable than a point null model (this is called \emph{the +Jeffreys-Lindley-Bartlett paradox}). Thus, you should only ever try (or want) +to compute a Bayes factor when you have an informed prior. +\cr\cr +Additionally, for models using MCMC estimation the number of posterior +samples needed for testing is substantially larger than for estimation (the +default of 4000 samples may not be enough in many cases). A conservative rule +of thumb is to obtain 10 times more samples than would be required for +estimation (\emph{Gronau, Singmann, & Wagenmakers, 2017}). If less than 40,000 +samples are detected, a warning is issued. +} + \section{Transitivity of Bayes factors}{ For multiple inputs (models or hypotheses), the function will return multiple @@ -96,8 +111,9 @@ factor for comparing model \emph{A} to model \emph{B} by dividing them: \cr\cr \deqn{BF_{AB} = \frac{BF_{AC}}{BF_{BC}} = \frac{\frac{ML_{A}}{ML_{C}}}{\frac{ML_{B}}{ML_{C}}} = \frac{ML_{A}}{ML_{B}}} \cr\cr -A full matrix comparing all models can be obtained with \code{as.matrix()} (see -examples). +(Where \emph{ML} is the \emph{marginal likelihood}.) +\cr\cr +A full matrix comparing all models can be obtained with \code{as.matrix()}. } \section{Interpreting Bayes Factors}{ @@ -106,7 +122,7 @@ A Bayes factor greater than 1 can be interpreted as evidence against the null, at which one convention is that a Bayes factor greater than 3 can be considered as "substantial" evidence against the null (and vice versa, a Bayes factor smaller than 1/3 indicates substantial evidence in favor of the -null-model) (\cite{Wetzels et al. 2011}). +null-model). See also \code{effectsize::interpret_bf()}. } \examples{ @@ -205,6 +221,15 @@ Statistical Evidence in Experimental Psychology: An Empirical Comparison Using 8 Perspectives on Psychological Science, 6(3), 291–298. \doi{10.1177/1745691611406923} } } +\seealso{ +\code{\link[=bayesfactor_inclusion]{bayesfactor_inclusion()}} for testing predictors across Bayesian models. + +Other Bayes factors: +\code{\link{bayesfactor_inclusion}()}, +\code{\link{bayesfactor_parameters}()}, +\code{\link{bayesfactor_restricted}()} +} \author{ Mattan S. Ben-Shachar } +\concept{Bayes factors} diff --git a/man/bayesfactor_parameters.Rd b/man/bayesfactor_parameters.Rd index 5a33aebe9..bd5f05237 100644 --- a/man/bayesfactor_parameters.Rd +++ b/man/bayesfactor_parameters.Rd @@ -169,7 +169,7 @@ factors; see examples). This method computes Bayes factors against the null (either a point or an interval), based on prior and posterior samples of a single parameter. This Bayes factor indicates the degree by which the mass of the posterior -distribution has shifted further away from or closer to the null value(s) +distribution has shifted away from or closer to the null value(s) (relative to the prior distribution), thus indicating if the null value has become less or more likely given the observed data. \cr \cr @@ -181,24 +181,21 @@ a Bayes factor comparing the marginal likelihoods of the model against a model in which the tested parameter has been restricted to the point null (Wagenmakers et al., 2010; Heck, 2019). \cr \cr -Note that the \code{logspline} package is used for estimating densities and -probabilities, and must be installed for the function to work. -\cr \cr -\code{bayesfactor_pointnull()} and \code{bayesfactor_rope()} are wrappers -around \code{bayesfactor_parameters} with different defaults for the null to -be tested against (a point and a range, respectively). Aliases of the main -functions are prefixed with \verb{bf_*}, like \code{bf_parameters()} or -\code{bf_pointnull()}. +\code{bayesfactor_pointnull()} and \code{bayesfactor_rope()} are wrappers around +\code{bayesfactor_parameters()} with different defaults for the null to be tested +against (a point and a range, respectively; see details). The \verb{bf_*} +functions are aliases of the main functions. \cr \cr -\strong{For more info, in particular on specifying correct priors for factors -with more than 2 levels, see -\href{https://easystats.github.io/bayestestR/articles/bayes_factors.html}{the Bayes factors vignette}.} +\strong{For more info, see \href{https://easystats.github.io/bayestestR/articles/bayes_factors.html}{the Bayes factors vignette}.} } \details{ This method is used to compute Bayes factors based on prior and posterior distributions. +\cr \cr +Note that the \code{logspline} package is used for estimating densities and +probabilities, and must be installed for the function to work. +\subsection{One-sided & Dividing Tests (setting an order restriction):}{ -\subsection{One-sided & Dividing Tests (setting an order restriction)}{ One sided tests (controlled by \code{direction}) are conducted by restricting the prior and posterior of the non-null values (the "alternative") to one side of the null only (\cite{Morey & Wagenmakers, 2014}). For example, if we @@ -214,6 +211,16 @@ opposing one-sided hypotheses (\cite{Morey & Wagenmakers, 2014}). For example, for a Bayes factor comparing the "null" of \verb{<0} to the alternative \verb{>0}, we would set \code{bayesfactor_parameters(null = c(-Inf, 0))}. } + +\subsection{Additional methods}{ + +The resulting output is supported by the following methods: +\itemize{ +\item \code{as.numeric()}: Extract the (possibly log-)Bayes factor values. +} + +See \link{bayesfactor_methods}. +} } \note{ There is also a @@ -221,24 +228,13 @@ There is also a implemented in the \href{https://easystats.github.io/see/}{\pkg{see}-package}. } -\section{Setting the correct \code{prior}}{ +\section{Obtaining prior samples}{ + -For the computation of Bayes factors, the model priors must be proper priors -(at the very least they should be \emph{not flat}, and it is preferable that -they be \emph{informative}); As the priors for the alternative get wider, the -likelihood of the null value(s) increases, to the extreme that for completely -flat priors the null is infinitely more favorable than the alternative (this -is called \emph{the Jeffreys-Lindley-Bartlett paradox}). Thus, you should -only ever try (or want) to compute a Bayes factor when you have an informed -prior. -\cr\cr -(Note that by default, \code{brms::brm()} uses flat priors for fixed-effects; -See example below.) -\cr\cr It is important to provide the correct \code{prior} for meaningful results, to match the \code{posterior}-type input: \itemize{ -\item \strong{A numeric vector} - \code{prior} should also be a \emph{numeric vector}, representing the prior-estimate. +\item \strong{A numeric vector} - \code{prior} should also be a \emph{numeric vector}, representing the prior-distribution \item \strong{A data frame} - \code{prior} should also be a \emph{data frame}, representing the prior-estimates, in matching column order. \itemize{ \item If \code{rvar_col} is specified, \code{prior} should be \emph{the name of an \code{rvar} column} that represents the prior-estimates. @@ -260,13 +256,37 @@ any transformations -- \code{"log"}, \code{"response"}, etc. -- or any \code{reg } } +\section{Prior and posterior considerations}{ + +In order to correctly and precisely estimate Bayes factors, a rule of thumb +are the 4 P's: \strong{P}roper \strong{P}riors and \strong{P}lentiful +\strong{P}osteriors. +\cr\cr +For the computation of Bayes factors, the model priors must be proper priors +(at the very least they should be \emph{not flat}, and it is preferable that they +be \emph{informative}) (Note that by default, \code{brms::brm()} uses flat priors for +fixed-effects); Wide priors result in smaller marginal likelihoods, and thus +models with wider priors are trivially less likely than models with narrower +priors - where, at the extreme, that a model with completely flat priors is +infinitely less favorable than a point null model (this is called \emph{the +Jeffreys-Lindley-Bartlett paradox}). Thus, you should only ever try (or want) +to compute a Bayes factor when you have an informed prior. +\cr\cr +Additionally, for models using MCMC estimation the number of posterior +samples needed for testing is substantially larger than for estimation (the +default of 4000 samples may not be enough in many cases). A conservative rule +of thumb is to obtain 10 times more samples than would be required for +estimation (\emph{Gronau, Singmann, & Wagenmakers, 2017}). If less than 40,000 +samples are detected, a warning is issued. +} + \section{Interpreting Bayes Factors}{ A Bayes factor greater than 1 can be interpreted as evidence against the null, at which one convention is that a Bayes factor greater than 3 can be considered as "substantial" evidence against the null (and vice versa, a Bayes factor smaller than 1/3 indicates substantial evidence in favor of the -null-model) (\cite{Wetzels et al. 2011}). +null-model). See also \code{effectsize::interpret_bf()}. } \section{Model components}{ @@ -370,6 +390,13 @@ An Empirical Comparison Using 855 t Tests. Perspectives on Psychological Science, 6(3), 291–298. \doi{10.1177/1745691611406923} } } +\seealso{ +Other Bayes factors: +\code{\link{bayesfactor_inclusion}()}, +\code{\link{bayesfactor_models}()}, +\code{\link{bayesfactor_restricted}()} +} \author{ Mattan S. Ben-Shachar } +\concept{Bayes factors} diff --git a/man/bayesfactor_restricted.Rd b/man/bayesfactor_restricted.Rd index c848cf7c7..3529a82ee 100644 --- a/man/bayesfactor_restricted.Rd +++ b/man/bayesfactor_restricted.Rd @@ -8,8 +8,6 @@ \alias{bayesfactor_restricted.blavaan} \alias{bayesfactor_restricted.emmGrid} \alias{bayesfactor_restricted.data.frame} -\alias{as.logical.bayesfactor_restricted} -\alias{as.matrix.bayesfactor_restricted} \title{Bayes Factors (BF) for Order Restricted Models} \usage{ bayesfactor_restricted(posterior, ...) @@ -59,10 +57,6 @@ bf_restricted(posterior, ...) rvar_col = NULL, ... ) - -\method{as.logical}{bayesfactor_restricted}(x, which = c("posterior", "prior"), ...) - -\method{as.matrix}{bayesfactor_restricted}(x, ...) } \arguments{ \item{posterior}{A \code{stanreg} / \code{brmsfit} object, \code{emmGrid} or a data frame - representing @@ -113,10 +107,6 @@ parameters) are returned. \item{rvar_col}{A single character - the name of an \code{rvar} column in the data frame to be processed. See example in \code{\link[=p_direction]{p_direction()}}.} - -\item{x}{An object of class \code{bayesfactor_restricted}} - -\item{which}{Should the logical matrix be of the posterior or prior distribution(s)?} } \value{ A data frame containing the (log) Bayes factor representing evidence @@ -124,18 +114,14 @@ A data frame containing the (log) Bayes factor representing evidence non-log Bayes factors; see examples). (A \code{bool_results} attribute contains the results for each sample, indicating if they are included or not in the hypothesized restriction.) -\cr\cr -For \code{as.matrix()} a square matrix of (log) Bayes factors, with rows as -denominators and columns as numerators. } \description{ This method computes Bayes factors for comparing a model with an order restrictions on its parameters with the fully unrestricted model. \emph{Note that this method should only be used for confirmatory analyses}. -\cr \cr +\cr\cr The \verb{bf_*} function is an alias of the main function. \cr \cr -\strong{For more info, in particular on specifying correct priors for factors with more than 2 levels, -see \href{https://easystats.github.io/bayestestR/articles/bayes_factors.html}{the Bayes factors vignette}.} +\strong{For more info, see \href{https://easystats.github.io/bayestestR/articles/bayes_factors.html}{the Bayes factors vignette}.} } \details{ This method is used to compute Bayes factors for order-restricted @@ -144,25 +130,27 @@ and posterior distributions (\cite{Morey & Wagenmakers, 2013}). \cr\cr (Though it is possible to use \code{bayesfactor_restricted()} to test interval restrictions, it is more suitable for testing order restrictions; see examples). +\subsection{Additional methods}{ + +The resulting output is supported by the following methods: +\itemize{ +\item \code{as.matrix()}: Extract a full matrix of (log-)Bayes factors between all +models (using the transitivity of Bayes factors). +\item \code{as.logical()}: Extract boolean vectors indicating which (prior/posterior) +samples are included in the hypothesized restriction. +\item \code{as.numeric()}: Extract the (possibly log-)Bayes factor values. } -\section{Setting the correct \code{prior}}{ - -For the computation of Bayes factors, the model priors must be proper priors -(at the very least they should be \emph{not flat}, and it is preferable that -they be \emph{informative}); As the priors for the alternative get wider, the -likelihood of the null value(s) increases, to the extreme that for completely -flat priors the null is infinitely more favorable than the alternative (this -is called \emph{the Jeffreys-Lindley-Bartlett paradox}). Thus, you should -only ever try (or want) to compute a Bayes factor when you have an informed -prior. -\cr\cr -(Note that by default, \code{brms::brm()} uses flat priors for fixed-effects; -See example below.) -\cr\cr + +See examples and \link{bayesfactor_methods}. +} +} +\section{Obtaining prior samples}{ + + It is important to provide the correct \code{prior} for meaningful results, to match the \code{posterior}-type input: \itemize{ -\item \strong{A numeric vector} - \code{prior} should also be a \emph{numeric vector}, representing the prior-estimate. +\item \strong{A numeric vector} - \code{prior} should also be a \emph{numeric vector}, representing the prior-distribution \item \strong{A data frame} - \code{prior} should also be a \emph{data frame}, representing the prior-estimates, in matching column order. \itemize{ \item If \code{rvar_col} is specified, \code{prior} should be \emph{the name of an \code{rvar} column} that represents the prior-estimates. @@ -195,8 +183,9 @@ factor for comparing model \emph{A} to model \emph{B} by dividing them: \cr\cr \deqn{BF_{AB} = \frac{BF_{AC}}{BF_{BC}} = \frac{\frac{ML_{A}}{ML_{C}}}{\frac{ML_{B}}{ML_{C}}} = \frac{ML_{A}}{ML_{B}}} \cr\cr -A full matrix comparing all models can be obtained with \code{as.matrix()} (see -examples). +(Where \emph{ML} is the \emph{marginal likelihood}.) +\cr\cr +A full matrix comparing all models can be obtained with \code{as.matrix()}. } \section{Interpreting Bayes Factors}{ @@ -205,7 +194,7 @@ A Bayes factor greater than 1 can be interpreted as evidence against the null, at which one convention is that a Bayes factor greater than 3 can be considered as "substantial" evidence against the null (and vice versa, a Bayes factor smaller than 1/3 indicates substantial evidence in favor of the -null-model) (\cite{Wetzels et al. 2011}). +null-model). See also \code{effectsize::interpret_bf()}. } \examples{ @@ -299,3 +288,10 @@ Psychological methods, 16(4), 406. Retrieved from https://richarddmorey.org/category/order-restrictions/. } } +\seealso{ +Other Bayes factors: +\code{\link{bayesfactor_inclusion}()}, +\code{\link{bayesfactor_models}()}, +\code{\link{bayesfactor_parameters}()} +} +\concept{Bayes factors} diff --git a/man/si.Rd b/man/si.Rd index 1e3e292e6..455fd883b 100644 --- a/man/si.Rd +++ b/man/si.Rd @@ -109,15 +109,11 @@ than average, by some degree \emph{k}; these are values of the parameter that ar updating factor greater or equal than \emph{k}. From the perspective of the Savage-Dickey Bayes factor, testing against a point null hypothesis for any value within the support interval will yield a Bayes factor smaller than \emph{1/k}. +\cr\cr +\strong{For more info, see \href{https://easystats.github.io/bayestestR/articles/bayes_factors.html}{the Bayes factors vignette}.} } \details{ -\strong{For more info, in particular on specifying correct priors for factors with more than 2 levels, -see \href{https://easystats.github.io/bayestestR/articles/bayes_factors.html}{the Bayes factors vignette}.} - This method is used to compute support intervals based on prior and posterior distributions. -For the computation of support intervals, the model priors must be proper priors (at the very least -they should be \emph{not flat}, and it is preferable that they be \emph{informative} - note -that by default, \code{brms::brm()} uses flat priors for fixed-effects; see example below). } \note{ There is also a \href{https://easystats.github.io/see/articles/bayestestR.html}{\code{plot()}-method} implemented in the \href{https://easystats.github.io/see/}{\pkg{see}-package}. @@ -137,24 +133,37 @@ will be larger than 3. } } -\section{Setting the correct \code{prior}}{ +\section{Prior and posterior considerations}{ -For the computation of Bayes factors, the model priors must be proper priors -(at the very least they should be \emph{not flat}, and it is preferable that -they be \emph{informative}); As the priors for the alternative get wider, the -likelihood of the null value(s) increases, to the extreme that for completely -flat priors the null is infinitely more favorable than the alternative (this -is called \emph{the Jeffreys-Lindley-Bartlett paradox}). Thus, you should -only ever try (or want) to compute a Bayes factor when you have an informed -prior. +In order to correctly and precisely estimate Bayes factors, a rule of thumb +are the 4 P's: \strong{P}roper \strong{P}riors and \strong{P}lentiful +\strong{P}osteriors. \cr\cr -(Note that by default, \code{brms::brm()} uses flat priors for fixed-effects; -See example below.) +For the computation of Bayes factors, the model priors must be proper priors +(at the very least they should be \emph{not flat}, and it is preferable that they +be \emph{informative}) (Note that by default, \code{brms::brm()} uses flat priors for +fixed-effects); Wide priors result in smaller marginal likelihoods, and thus +models with wider priors are trivially less likely than models with narrower +priors - where, at the extreme, that a model with completely flat priors is +infinitely less favorable than a point null model (this is called \emph{the +Jeffreys-Lindley-Bartlett paradox}). Thus, you should only ever try (or want) +to compute a Bayes factor when you have an informed prior. \cr\cr +Additionally, for models using MCMC estimation the number of posterior +samples needed for testing is substantially larger than for estimation (the +default of 4000 samples may not be enough in many cases). A conservative rule +of thumb is to obtain 10 times more samples than would be required for +estimation (\emph{Gronau, Singmann, & Wagenmakers, 2017}). If less than 40,000 +samples are detected, a warning is issued. +} + +\section{Obtaining prior samples}{ + + It is important to provide the correct \code{prior} for meaningful results, to match the \code{posterior}-type input: \itemize{ -\item \strong{A numeric vector} - \code{prior} should also be a \emph{numeric vector}, representing the prior-estimate. +\item \strong{A numeric vector} - \code{prior} should also be a \emph{numeric vector}, representing the prior-distribution \item \strong{A data frame} - \code{prior} should also be a \emph{data frame}, representing the prior-estimates, in matching column order. \itemize{ \item If \code{rvar_col} is specified, \code{prior} should be \emph{the name of an \code{rvar} column} that represents the prior-estimates. From 899b2e779b6bb380d1084798d755174e2806ddc9 Mon Sep 17 00:00:00 2001 From: "Mattan S. Ben-Shachar" Date: Tue, 2 Sep 2025 22:38:51 +0300 Subject: [PATCH 02/36] doc ... --- R/bayesfactor-methods.R | 1 + man/bayesfactor_methods.Rd | 2 ++ 2 files changed, 3 insertions(+) diff --git a/R/bayesfactor-methods.R b/R/bayesfactor-methods.R index 47404cb7e..8cba9d4bf 100644 --- a/R/bayesfactor-methods.R +++ b/R/bayesfactor-methods.R @@ -1,6 +1,7 @@ #' Methods for Bayes factors #' #' @param x,object Bayes factor object +#' @param ... Additional arguments (currently not used). #' #' @return #' - `as.numeric()` / `as.double()` / `as.vector()`: a numeric vector of (log) diff --git a/man/bayesfactor_methods.Rd b/man/bayesfactor_methods.Rd index 926836d2c..e85b40469 100644 --- a/man/bayesfactor_methods.Rd +++ b/man/bayesfactor_methods.Rd @@ -21,6 +21,8 @@ \item{log}{Return log(BF) (default), or BF values.} +\item{...}{Additional arguments (currently not used).} + \item{subset}{Vector of model indices to keep or remove.} \item{reference}{Index of model to reference to, or \code{"top"} to From 6e030637dfcdfc8469e9412e67f6359e5fd00141 Mon Sep 17 00:00:00 2001 From: "Mattan S. Ben-Shachar" Date: Tue, 2 Sep 2025 22:40:01 +0300 Subject: [PATCH 03/36] styler --- R/bayesfactor-methods.R | 6 ++--- R/bayesfactor_models.R | 30 ++++++++++++------------ R/bayesfactor_restricted.R | 15 ++++++------ tests/testthat/test-bayesfactor_models.R | 5 ++-- 4 files changed, 26 insertions(+), 30 deletions(-) diff --git a/R/bayesfactor-methods.R b/R/bayesfactor-methods.R index 8cba9d4bf..ab243de52 100644 --- a/R/bayesfactor-methods.R +++ b/R/bayesfactor-methods.R @@ -70,7 +70,6 @@ as.matrix.bayestestRBF <- function(x, log = TRUE, ...) { log_BFs <- c(0, x$log_BF) models <- c("(Un-restricted)", x$Hypothesis) bf_fun <- "bayesfactor_restricted()" - } else if (inherits(x, "bayesfactor_models")) { log_BFs <- x$log_BF models <- x$Model @@ -131,8 +130,7 @@ print.bayesfactor_matrix <- function(x, log = FALSE, ...) { ) # caption and footer - caption <- switch( - attr(orig_x, "bf_fun"), + caption <- switch(attr(orig_x, "bf_fun"), "bayesfactor_restricted()" = "# Bayes Factors for Restricted Models", "# Bayes Factors for Model Comparison" ) @@ -217,4 +215,4 @@ as.vector.bayestestRBF <- as.numeric.bayestestRBF as.logical.bayesfactor_restricted <- function(x, which = c("posterior", "prior"), ...) { which <- match.arg(which) as.matrix(attr(x, "bool_results")[[which]]) -} \ No newline at end of file +} diff --git a/R/bayesfactor_models.R b/R/bayesfactor_models.R index 12b3e4c79..149abc786 100644 --- a/R/bayesfactor_models.R +++ b/R/bayesfactor_models.R @@ -217,9 +217,9 @@ bayesfactor_models.default <- function(..., denominator = 1, verbose = TRUE) { # Get BIC if (were_checked && estimator == "REML" && - any(vapply(mods, insight::is_mixed_model, TRUE)) && - !isTRUE(attr(model_objects, "same_fixef")) && - verbose) { + any(vapply(mods, insight::is_mixed_model, TRUE)) && + !isTRUE(attr(model_objects, "same_fixef")) && + verbose) { insight::format_warning(paste( "Information criteria (like BIC) based on REML fits (i.e. `estimator=\"REML\"`)", "are not recommended for comparison between models with different fixed effects.", @@ -250,10 +250,10 @@ bayesfactor_models.default <- function(..., denominator = 1, verbose = TRUE) { ) .bf_models_output(res, - denominator = denominator, - bf_method = "BIC approximation", - unsupported_models = !all(supported_models), - model_names = names(mods) + denominator = denominator, + bf_method = "BIC approximation", + unsupported_models = !all(supported_models), + model_names = names(mods) ) } @@ -283,9 +283,9 @@ bayesfactor_models.default <- function(..., denominator = 1, verbose = TRUE) { } .bf_models_output(res, - denominator = denominator, - bf_method = bf_method, - unsupported_models = unsupported_models + denominator = denominator, + bf_method = bf_method, + unsupported_models = unsupported_models ) } @@ -297,8 +297,8 @@ bayesfactor_models.default <- function(..., denominator = 1, verbose = TRUE) { # Test that all is good: resps <- lapply(mods, insight::get_response) from_same_data_as_den <- sapply(resps[-denominator], - identical, - y = resps[[denominator]] + identical, + y = resps[[denominator]] ) if (!all(from_same_data_as_den)) { @@ -394,9 +394,9 @@ bayesfactor_models.BFBayesFactor <- function(..., verbose = TRUE) { ) .bf_models_output(res, - denominator = 1, - bf_method = "JZS (BayesFactor)", - unsupported_models = !inherits(models@denominator, "BFlinearModel") + denominator = 1, + bf_method = "JZS (BayesFactor)", + unsupported_models = !inherits(models@denominator, "BFlinearModel") ) } diff --git a/R/bayesfactor_restricted.R b/R/bayesfactor_restricted.R index 81ce20d18..9e228f1a5 100644 --- a/R/bayesfactor_restricted.R +++ b/R/bayesfactor_restricted.R @@ -150,8 +150,8 @@ bayesfactor_restricted.stanreg <- function(posterior, hypothesis, prior = NULL, component = "conditional", ...) { samps <- .clean_priors_and_posteriors(posterior, prior, - effects = effects, component = component, - verbose = verbose + effects = effects, component = component, + verbose = verbose ) # Get savage-dickey BFs @@ -170,7 +170,7 @@ bayesfactor_restricted.brmsfit <- bayesfactor_restricted.stanreg bayesfactor_restricted.blavaan <- function(posterior, hypothesis, prior = NULL, verbose = TRUE, ...) { samps <- .clean_priors_and_posteriors(posterior, prior, - verbose = verbose + verbose = verbose ) # Get savage-dickey BFs @@ -187,7 +187,7 @@ bayesfactor_restricted.emmGrid <- function(posterior, hypothesis, prior = NULL, verbose = TRUE, ...) { samps <- .clean_priors_and_posteriors(posterior, prior, - verbose = verbose + verbose = verbose ) bayesfactor_restricted.data.frame( @@ -283,12 +283,11 @@ bayesfactor_restricted.data.frame <- function(posterior, hypothesis, prior = NUL #' @export bayesfactor_restricted.draws <- function(posterior, hypothesis, prior = NULL, ...) { bayesfactor_restricted(.posterior_draws_to_df(posterior), - hypothesis = hypothesis, - prior = if (!is.null(prior)) .posterior_draws_to_df(prior), - ... + hypothesis = hypothesis, + prior = if (!is.null(prior)) .posterior_draws_to_df(prior), + ... ) } #' @export bayesfactor_restricted.rvar <- bayesfactor_restricted.draws - diff --git a/tests/testthat/test-bayesfactor_models.R b/tests/testthat/test-bayesfactor_models.R index e2c0c3ef4..6c05b0731 100644 --- a/tests/testthat/test-bayesfactor_models.R +++ b/tests/testthat/test-bayesfactor_models.R @@ -33,8 +33,8 @@ test_that("bayesfactor_models BIC", { # update reference expect_equal(update(BFM2, reference = 1)$log_BF, - c(0, -2.8, -6.2, -57.4), - tolerance = 0.1 + c(0, -2.8, -6.2, -57.4), + tolerance = 0.1 ) }) @@ -231,4 +231,3 @@ test_that("bayesfactor_inclusion | LMM", { expect_equal(bfinc_matched$p_posterior, c(1, 0.875, 0.125, 0.009, 0.002), tolerance = 0.1) expect_equal(bfinc_matched$log_BF, c(NaN, 58.904, -3.045, -3.573, -1.493), tolerance = 0.1) }) - From df62b916c24c9f75a1983a9d2975d309dcd5f201 Mon Sep 17 00:00:00 2001 From: "Mattan S. Ben-Shachar" Date: Tue, 2 Sep 2025 22:46:30 +0300 Subject: [PATCH 04/36] make as.vector generic --- NAMESPACE | 2 ++ R/bayesfactor-methods.R | 22 ++++++++++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 0cbe9866a..0042f1c81 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -21,6 +21,7 @@ S3method(as.numeric,p_direction) S3method(as.numeric,p_map) S3method(as.numeric,p_significance) S3method(as.vector,bayestestRBF) +S3method(as.vector,default) S3method(as.vector,p_direction) S3method(bayesfactor_inclusion,BFBayesFactor) S3method(bayesfactor_inclusion,bayesfactor_models) @@ -622,6 +623,7 @@ S3method(weighted_posteriors,brmsfit) S3method(weighted_posteriors,data.frame) S3method(weighted_posteriors,stanreg) export(area_under_curve) +export(as.vector) export(auc) export(bayesfactor) export(bayesfactor_inclusion) diff --git a/R/bayesfactor-methods.R b/R/bayesfactor-methods.R index ab243de52..27c0f608e 100644 --- a/R/bayesfactor-methods.R +++ b/R/bayesfactor-methods.R @@ -131,8 +131,8 @@ print.bayesfactor_matrix <- function(x, log = FALSE, ...) { # caption and footer caption <- switch(attr(orig_x, "bf_fun"), - "bayesfactor_restricted()" = "# Bayes Factors for Restricted Models", - "# Bayes Factors for Model Comparison" + "bayesfactor_restricted()" = "# Bayes Factors for Restricted Models", + "# Bayes Factors for Model Comparison" ) footer <- if (log) c("\nBayes Factors are on the log-scale.\n", "red") @@ -216,3 +216,21 @@ as.logical.bayesfactor_restricted <- function(x, which = c("posterior", "prior") which <- match.arg(which) as.matrix(attr(x, "bool_results")[[which]]) } + + + + +# Utils ----------------------------- +# We need this to avoid argument conflicts with the non-generic as.vector +# For as.vector.bayestestRBF and as.vector.p_direction + +#' @export +as.vector <- function(x, ...) { + UseMethod("as.vector") +} + +#' @export +as.vector.default <- function(x, ...) { + base::as.vector(x) +} + From 3fe649a37f008c79a11b25333d9f3b9a2568845a Mon Sep 17 00:00:00 2001 From: "Mattan S. Ben-Shachar" Date: Tue, 2 Sep 2025 22:48:40 +0300 Subject: [PATCH 05/36] drop as.vector method --- NAMESPACE | 3 --- R/bayesfactor-methods.R | 22 +--------------------- man/bayesfactor_methods.Rd | 2 +- 3 files changed, 2 insertions(+), 25 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 0042f1c81..3b06ddcf7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -20,8 +20,6 @@ S3method(as.numeric,map_estimate) S3method(as.numeric,p_direction) S3method(as.numeric,p_map) S3method(as.numeric,p_significance) -S3method(as.vector,bayestestRBF) -S3method(as.vector,default) S3method(as.vector,p_direction) S3method(bayesfactor_inclusion,BFBayesFactor) S3method(bayesfactor_inclusion,bayesfactor_models) @@ -623,7 +621,6 @@ S3method(weighted_posteriors,brmsfit) S3method(weighted_posteriors,data.frame) S3method(weighted_posteriors,stanreg) export(area_under_curve) -export(as.vector) export(auc) export(bayesfactor) export(bayesfactor_inclusion) diff --git a/R/bayesfactor-methods.R b/R/bayesfactor-methods.R index 27c0f608e..c5a70b067 100644 --- a/R/bayesfactor-methods.R +++ b/R/bayesfactor-methods.R @@ -4,7 +4,7 @@ #' @param ... Additional arguments (currently not used). #' #' @return -#' - `as.numeric()` / `as.double()` / `as.vector()`: a numeric vector of (log) +#' - `as.numeric()` / `as.double()`: a numeric vector of (log) #' Bayes factors. #' - `as.logical()`: a logical data frame with a column for each #' order-restricted hypothesis. @@ -203,8 +203,6 @@ as.numeric.bayestestRBF <- function(x, log = FALSE, ...) { #' @export as.double.bayestestRBF <- as.numeric.bayestestRBF -#' @export -as.vector.bayestestRBF <- as.numeric.bayestestRBF ## as.logical ----------------------------------------------------------------- @@ -216,21 +214,3 @@ as.logical.bayesfactor_restricted <- function(x, which = c("posterior", "prior") which <- match.arg(which) as.matrix(attr(x, "bool_results")[[which]]) } - - - - -# Utils ----------------------------- -# We need this to avoid argument conflicts with the non-generic as.vector -# For as.vector.bayestestRBF and as.vector.p_direction - -#' @export -as.vector <- function(x, ...) { - UseMethod("as.vector") -} - -#' @export -as.vector.default <- function(x, ...) { - base::as.vector(x) -} - diff --git a/man/bayesfactor_methods.Rd b/man/bayesfactor_methods.Rd index e85b40469..454d9bb90 100644 --- a/man/bayesfactor_methods.Rd +++ b/man/bayesfactor_methods.Rd @@ -33,7 +33,7 @@ model.} } \value{ \itemize{ -\item \code{as.numeric()} / \code{as.double()} / \code{as.vector()}: a numeric vector of (log) +\item \code{as.numeric()} / \code{as.double()}: a numeric vector of (log) Bayes factors. \item \code{as.logical()}: a logical data frame with a column for each order-restricted hypothesis. From 4c0513fa2581f7e155b09279743238bea820cf7e Mon Sep 17 00:00:00 2001 From: "Mattan S. Ben-Shachar" Date: Sat, 27 Sep 2025 21:42:47 +0300 Subject: [PATCH 06/36] bew bfs for stan models --- NAMESPACE | 4 ++++ R/bayesfactor_parameters.R | 6 ++++++ R/bayesfactor_restricted.R | 6 ++++++ R/utils_bayesfactor.R | 29 +++++++++++++++++++++++++++++ R/utils_clean_stan_parameters.R | 18 ++++++++++++++++++ 5 files changed, 63 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 3b06ddcf7..a66241bd0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -28,6 +28,7 @@ S3method(bayesfactor_models,blavaan) S3method(bayesfactor_models,brmsfit) S3method(bayesfactor_models,default) S3method(bayesfactor_models,stanreg) +S3method(bayesfactor_parameters,CmdStanFit) S3method(bayesfactor_parameters,bayesfactor_models) S3method(bayesfactor_parameters,blavaan) S3method(bayesfactor_parameters,brmsfit) @@ -42,7 +43,9 @@ S3method(bayesfactor_parameters,rvar) S3method(bayesfactor_parameters,sim) S3method(bayesfactor_parameters,sim.merMod) S3method(bayesfactor_parameters,slopes) +S3method(bayesfactor_parameters,stanfit) S3method(bayesfactor_parameters,stanreg) +S3method(bayesfactor_restricted,CmdStanFit) S3method(bayesfactor_restricted,blavaan) S3method(bayesfactor_restricted,brmsfit) S3method(bayesfactor_restricted,comparisons) @@ -53,6 +56,7 @@ S3method(bayesfactor_restricted,emm_list) S3method(bayesfactor_restricted,predictions) S3method(bayesfactor_restricted,rvar) S3method(bayesfactor_restricted,slopes) +S3method(bayesfactor_restricted,stanfit) S3method(bayesfactor_restricted,stanreg) S3method(bci,BFBayesFactor) S3method(bci,BGGM) diff --git a/R/bayesfactor_parameters.R b/R/bayesfactor_parameters.R index 458228fe6..ae9a1d5a5 100644 --- a/R/bayesfactor_parameters.R +++ b/R/bayesfactor_parameters.R @@ -318,6 +318,12 @@ bayesfactor_parameters.stanreg <- function(posterior, #' @export bayesfactor_parameters.brmsfit <- bayesfactor_parameters.stanreg +#' @export +bayesfactor_parameters.CmdStanFit <- bayesfactor_parameters.stanreg + +#' @export +bayesfactor_parameters.stanfit <- bayesfactor_parameters.stanreg + #' @export bayesfactor_parameters.blavaan <- function(posterior, diff --git a/R/bayesfactor_restricted.R b/R/bayesfactor_restricted.R index 9e228f1a5..f0e7671ae 100644 --- a/R/bayesfactor_restricted.R +++ b/R/bayesfactor_restricted.R @@ -165,6 +165,12 @@ bayesfactor_restricted.stanreg <- function(posterior, hypothesis, prior = NULL, #' @export bayesfactor_restricted.brmsfit <- bayesfactor_restricted.stanreg +#' @export +bayesfactor_restricted.CmdStanFit <- bayesfactor_restricted.stanreg + +#' @export +bayesfactor_restricted.stanfit <- bayesfactor_restricted.stanreg + #' @rdname bayesfactor_restricted #' @export bayesfactor_restricted.blavaan <- function(posterior, hypothesis, prior = NULL, diff --git a/R/utils_bayesfactor.R b/R/utils_bayesfactor.R index 408fcd41f..a04aedf10 100644 --- a/R/utils_bayesfactor.R +++ b/R/utils_bayesfactor.R @@ -201,6 +201,35 @@ .clean_priors_and_posteriors.comparisons <- .clean_priors_and_posteriors.slopes +.clean_priors_and_posteriors.stanfit <- function(posterior, prior, + verbose = TRUE, + ...) { + posterior <- insight::get_parameters(posterior) + + # Get Priors + if (!is.null(prior)) { + prior <- insight::get_parameters(prior) + } + + list( + posterior = posterior, + prior = prior + ) +} + +.clean_priors_and_posteriors.CmdStanFit <- .clean_priors_and_posteriors.stanfit + + +#' @keywords internal +get_parameters.CmdStanFit <- function(x, ...) { + insight::check_if_installed("cmdstanr") + + out <- as.data.frame(x$draws(format = "draws_df")) + out[c(".chain", ".iteration", ".draw")] <- NULL + out[grepl("^lp_", colnames(out))] <- NULL + out +} + # BMA --------------------------------------------------------------------- diff --git a/R/utils_clean_stan_parameters.R b/R/utils_clean_stan_parameters.R index ed0cce232..78d87d558 100644 --- a/R/utils_clean_stan_parameters.R +++ b/R/utils_clean_stan_parameters.R @@ -23,11 +23,29 @@ } +# .get_cleaned_parameters ------------------------------------------------- + #' @keywords internal .get_cleaned_parameters <- function(x, ...) { dots <- list(...) if ("cleaned_parameters" %in% names(dots)) { return(dots$cleaned_parameters) } + + UseMethod(".get_cleaned_parameters") +} + +#' @keywords internal +.get_cleaned_parameters.default <- function(x, ...) { insight::clean_parameters(x) } + +#' @keywords internal +.get_cleaned_parameters.stanfit <- function(x, ...) { + NULL +} + +.get_cleaned_parameters.CmdStanFit <- .get_cleaned_parameters.stanfit + + + From a656488adeb6e0687fb69b2abce62601450c41f3 Mon Sep 17 00:00:00 2001 From: "Mattan S. Ben-Shachar" Date: Sat, 27 Sep 2025 22:02:38 +0300 Subject: [PATCH 07/36] diagnostics --- NAMESPACE | 3 +++ R/diagnostic_posterior.R | 39 +++++++++++++++++++++++++++++++++------ R/effective_sample.R | 4 ++++ R/mcse.R | 5 +++++ 4 files changed, 45 insertions(+), 6 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index a66241bd0..ee2413875 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -149,6 +149,7 @@ S3method(describe_prior,draws) S3method(describe_prior,rvar) S3method(describe_prior,stanreg) S3method(diagnostic_draws,brmsfit) +S3method(diagnostic_posterior,CmdStanFit) S3method(diagnostic_posterior,blavaan) S3method(diagnostic_posterior,brmsfit) S3method(diagnostic_posterior,default) @@ -169,6 +170,7 @@ S3method(display,p_map) S3method(display,p_rope) S3method(display,p_significance) S3method(display,point_estimate) +S3method(effective_sample,CmdStanFit) S3method(effective_sample,MCMCglmm) S3method(effective_sample,blavaan) S3method(effective_sample,brmsfit) @@ -312,6 +314,7 @@ S3method(map_estimate,rvar) S3method(map_estimate,slopes) S3method(map_estimate,stanfit) S3method(map_estimate,stanreg) +S3method(mcse,CmdStanFit) S3method(mcse,blavaan) S3method(mcse,brmsfit) S3method(mcse,stanfit) diff --git a/R/diagnostic_posterior.R b/R/diagnostic_posterior.R index b46459c3a..b31eb2964 100644 --- a/R/diagnostic_posterior.R +++ b/R/diagnostic_posterior.R @@ -209,10 +209,10 @@ diagnostic_posterior.brmsfit <- function(posterior, ...) { # Find parameters params <- insight::find_parameters(posterior, - effects = effects, - component = component, - parameters = parameters, - flatten = TRUE + effects = effects, + component = component, + parameters = parameters, + flatten = TRUE ) # If no diagnostic @@ -286,8 +286,8 @@ diagnostic_posterior.stanfit <- function(posterior, diagnostic = "all", effects insight::check_if_installed("rstan") all_params <- insight::find_parameters(posterior, - effects = effects, - flatten = TRUE + effects = effects, + flatten = TRUE ) diagnostic_df <- data.frame( @@ -316,6 +316,33 @@ diagnostic_posterior.stanfit <- function(posterior, diagnostic = "all", effects } +#' @export +diagnostic_posterior.CmdStanFit <- function(model, + diagnostic = "all", + ...) { + if ("all" %in% diagnostic) { + diagnostic <- c("ESS", "Rhat", "MCSE") + } + + insight::check_if_installed("posterior") + insight::check_if_installed("cmdstanr") + + + draws <- model$draws(format = "draws_df") + + out <- posterior::summarize_draws(draws, + posterior::default_convergence_measures(), + MCSE = posterior::mcse_mean) + out <- datawizard::data_rename(as.data.frame(out), + c(Parameter = "variable", + ESS = "ess_bulk", + ESS_tail = "ess_tail", + Rhat = "rhat")) + + out[!grepl("^lp_", out$Parameter), c("Parameter", diagnostic), drop = FALSE] +} + + #' @export diagnostic_posterior.blavaan <- function(posterior, diagnostic = "all", ...) { # Find parameters diff --git a/R/effective_sample.R b/R/effective_sample.R index e8c51a6b5..12d402ac0 100644 --- a/R/effective_sample.R +++ b/R/effective_sample.R @@ -182,6 +182,10 @@ effective_sample.stanfit <- function(model, ) } +#' @export +effective_sample.CmdStanFit <- function(model, ...) { + diagnostic_posterior(model, diagnostic = c("ESS", "ESS_tail")) +} #' @export effective_sample.blavaan <- function(model, parameters = NULL, ...) { diff --git a/R/mcse.R b/R/mcse.R index 40f65d942..463222452 100644 --- a/R/mcse.R +++ b/R/mcse.R @@ -109,3 +109,8 @@ mcse.blavaan <- mcse.stanreg row.names = NULL ) } + +#' @export +mcse.CmdStanFit <- function(model, ...) { + diagnostic_posterior(model, diagnostic = "MCSE") +} \ No newline at end of file From 149f8b317e671101fea0f04b7402521622ca2aa5 Mon Sep 17 00:00:00 2001 From: "Mattan S. Ben-Shachar" Date: Sat, 27 Sep 2025 22:04:38 +0300 Subject: [PATCH 08/36] CI methods --- NAMESPACE | 4 ++++ R/bci.R | 3 +++ R/ci.R | 3 +++ R/eti.R | 3 +++ R/hdi.R | 3 +++ 5 files changed, 16 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index ee2413875..ee9ea4d90 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -60,6 +60,7 @@ S3method(bayesfactor_restricted,stanfit) S3method(bayesfactor_restricted,stanreg) S3method(bci,BFBayesFactor) S3method(bci,BGGM) +S3method(bci,CmdStanFit) S3method(bci,MCMCglmm) S3method(bci,bamlss) S3method(bci,bayesQR) @@ -88,6 +89,7 @@ S3method(check_prior,brmsfit) S3method(check_prior,stanreg) S3method(ci,BFBayesFactor) S3method(ci,BGGM) +S3method(ci,CmdStanFit) S3method(ci,MCMCglmm) S3method(ci,bamlss) S3method(ci,bcplm) @@ -227,6 +229,7 @@ S3method(estimate_density,stanfit) S3method(estimate_density,stanreg) S3method(eti,BFBayesFactor) S3method(eti,BGGM) +S3method(eti,CmdStanFit) S3method(eti,MCMCglmm) S3method(eti,bamlss) S3method(eti,bayesQR) @@ -269,6 +272,7 @@ S3method(format,point_estimate) S3method(format,rope) S3method(hdi,BFBayesFactor) S3method(hdi,BGGM) +S3method(hdi,CmdStanFit) S3method(hdi,MCMCglmm) S3method(hdi,bamlss) S3method(hdi,bayesQR) diff --git a/R/bci.R b/R/bci.R index a7b875031..b58ce7a17 100644 --- a/R/bci.R +++ b/R/bci.R @@ -227,6 +227,9 @@ bci.stanreg <- function(x, #' @export bci.stanfit <- bci.stanreg +#' @export +bci.CmdStanFit <- bci.stanreg + #' @export bci.blavaan <- bci.stanreg diff --git a/R/ci.R b/R/ci.R index 1a32b204f..965b68c5b 100644 --- a/R/ci.R +++ b/R/ci.R @@ -333,6 +333,9 @@ ci.brmsfit <- function(x, #' @export ci.stanfit <- ci.stanreg +#' @export +ci.CmdStanFit <- ci.stanreg + #' @export ci.blavaan <- ci.stanreg diff --git a/R/eti.R b/R/eti.R index 90b18a1c3..cff0b2dfa 100644 --- a/R/eti.R +++ b/R/eti.R @@ -253,6 +253,9 @@ eti.stanfit <- eti.stanreg #' @export eti.blavaan <- eti.stanreg +#' @export +eti.CmdStanFit <- eti.stanreg + #' @rdname eti #' @export diff --git a/R/hdi.R b/R/hdi.R index 5d7f46695..4ebbd5464 100644 --- a/R/hdi.R +++ b/R/hdi.R @@ -382,6 +382,9 @@ hdi.stanreg <- function(x, #' @export hdi.stanfit <- hdi.stanreg +#' @export +hdi.CmdStanFit <- hdi.stanreg + #' @export hdi.blavaan <- hdi.stanreg From f8fccfab822bfaaf6baf2ea7bd8470440d86ef2f Mon Sep 17 00:00:00 2001 From: "Mattan S. Ben-Shachar" Date: Sat, 27 Sep 2025 22:08:47 +0300 Subject: [PATCH 09/36] density and point est --- NAMESPACE | 3 +++ R/estimate_density.R | 3 +++ R/map_estimate.R | 3 +++ R/point_estimate.R | 3 +++ 4 files changed, 12 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index ee9ea4d90..43ce5dbc9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -204,6 +204,7 @@ S3method(equivalence_test,slopes) S3method(equivalence_test,stanfit) S3method(equivalence_test,stanreg) S3method(estimate_density,BGGM) +S3method(estimate_density,CmdStanFit) S3method(estimate_density,MCMCglmm) S3method(estimate_density,bamlss) S3method(estimate_density,bayesQR) @@ -298,6 +299,7 @@ S3method(hdi,slopes) S3method(hdi,stanfit) S3method(hdi,stanreg) S3method(map_estimate,BGGM) +S3method(map_estimate,CmdStanFit) S3method(map_estimate,bamlss) S3method(map_estimate,bayesQR) S3method(map_estimate,bcplm) @@ -449,6 +451,7 @@ S3method(plot,point_estimate) S3method(plot,rope) S3method(point_estimate,BFBayesFactor) S3method(point_estimate,BGGM) +S3method(point_estimate,CmdStanFit) S3method(point_estimate,MCMCglmm) S3method(point_estimate,bamlss) S3method(point_estimate,bayesQR) diff --git a/R/estimate_density.R b/R/estimate_density.R index 72d1ae635..93f307048 100644 --- a/R/estimate_density.R +++ b/R/estimate_density.R @@ -435,6 +435,9 @@ estimate_density.stanreg <- function(x, #' @export estimate_density.stanfit <- estimate_density.stanreg +#' @export +estimate_density.CmdStanFit <- estimate_density.stanreg + #' @export estimate_density.blavaan <- estimate_density.stanreg diff --git a/R/map_estimate.R b/R/map_estimate.R index 8580208a6..c9e69d154 100644 --- a/R/map_estimate.R +++ b/R/map_estimate.R @@ -170,6 +170,9 @@ map_estimate.stanreg <- function(x, #' @export map_estimate.stanfit <- map_estimate.stanreg +#' @export +map_estimate.CmdStanFit <- map_estimate.stanreg + #' @export map_estimate.blavaan <- map_estimate.stanreg diff --git a/R/point_estimate.R b/R/point_estimate.R index 6403ab63a..ab17fddd0 100644 --- a/R/point_estimate.R +++ b/R/point_estimate.R @@ -308,6 +308,9 @@ point_estimate.stanreg <- function(x, #' @export point_estimate.stanfit <- point_estimate.stanreg +#' @export +point_estimate.CmdStanFit <- point_estimate.stanreg + #' @export point_estimate.blavaan <- point_estimate.stanreg From 05f1479d08eeb05c302b6e65ce76d776e243e8cf Mon Sep 17 00:00:00 2001 From: "Mattan S. Ben-Shachar" Date: Sat, 27 Sep 2025 22:17:42 +0300 Subject: [PATCH 10/36] spi and si --- NAMESPACE | 2 ++ R/si.R | 13 +++++-------- R/spi.R | 5 ++++- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 43ce5dbc9..17733e2a3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -580,6 +580,7 @@ S3method(sexit_thresholds,mlm) S3method(sexit_thresholds,stanreg) S3method(sexit_thresholds,wbm) S3method(sexit_thresholds,zeroinfl) +S3method(si,CmdStanFit) S3method(si,blavaan) S3method(si,brmsfit) S3method(si,comparisons) @@ -600,6 +601,7 @@ S3method(simulate_prior,brmsfit) S3method(simulate_prior,stanreg) S3method(spi,BFBayesFactor) S3method(spi,BGGM) +S3method(spi,CmdStanFit) S3method(spi,MCMCglmm) S3method(spi,bamlss) S3method(spi,bayesQR) diff --git a/R/si.R b/R/si.R index d7ad5b40d..caca74912 100644 --- a/R/si.R +++ b/R/si.R @@ -120,7 +120,7 @@ si.stanreg <- function(posterior, prior = NULL, component = "location", parameters = NULL, ...) { - cleaned_parameters <- insight::clean_parameters(posterior) + cleaned_parameters <- .get_cleaned_parameters(posterior, ...) samps <- .clean_priors_and_posteriors(posterior, prior, effects = effects, component = component, @@ -181,13 +181,10 @@ si.predictions <- si.emmGrid #' @export -si.stanfit <- function(posterior, prior = NULL, BF = 1, verbose = TRUE, effects = "fixed", ...) { - out <- si(insight::get_parameters(posterior, effects = effects), - prior = prior, BF = BF, verbose = verbose - ) - attr(out, "object_name") <- insight::safe_deparse_symbol(substitute(posterior)) - out -} +si.stanfit <- si.stanreg + +#' @export +si.CmdStanFit <- si.stanreg #' @rdname si diff --git a/R/spi.R b/R/spi.R index 26064c29c..c7ce3db8f 100644 --- a/R/spi.R +++ b/R/spi.R @@ -195,7 +195,7 @@ spi.stanreg <- function(x, parameters = NULL, verbose = TRUE, ...) { - cleaned_parameters <- insight::clean_parameters(x) + cleaned_parameters <- .get_cleaned_parameters(x, ...) out <- .prepare_output( spi( @@ -222,6 +222,9 @@ spi.stanreg <- function(x, #' @export spi.stanfit <- spi.stanreg +#' @export +spi.CmdStanFit <- spi.stanreg + #' @export spi.blavaan <- spi.stanreg From b8d8a8f7ba8eb36f92332314784d3534f5efebca Mon Sep 17 00:00:00 2001 From: "Mattan S. Ben-Shachar" Date: Sat, 27 Sep 2025 22:26:13 +0300 Subject: [PATCH 11/36] fix arg name --- R/diagnostic_posterior.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/diagnostic_posterior.R b/R/diagnostic_posterior.R index b31eb2964..61f12c2b8 100644 --- a/R/diagnostic_posterior.R +++ b/R/diagnostic_posterior.R @@ -317,7 +317,7 @@ diagnostic_posterior.stanfit <- function(posterior, diagnostic = "all", effects #' @export -diagnostic_posterior.CmdStanFit <- function(model, +diagnostic_posterior.CmdStanFit <- function(posterior, diagnostic = "all", ...) { if ("all" %in% diagnostic) { @@ -328,7 +328,7 @@ diagnostic_posterior.CmdStanFit <- function(model, insight::check_if_installed("cmdstanr") - draws <- model$draws(format = "draws_df") + draws <- posterior$draws(format = "draws_df") out <- posterior::summarize_draws(draws, posterior::default_convergence_measures(), From 3fa18d59c4e65367e31f9bb6773d8fa611f6fab1 Mon Sep 17 00:00:00 2001 From: "Mattan S. Ben-Shachar" Date: Sun, 28 Sep 2025 09:55:25 +0300 Subject: [PATCH 12/36] roxygen --- DESCRIPTION | 2 +- man/bayesfactor.Rd | 2 +- man/bayesfactor_inclusion.Rd | 2 +- man/bayesfactor_models.Rd | 2 +- man/bayesfactor_parameters.Rd | 6 +++--- man/bayesfactor_restricted.Rd | 6 +++--- man/check_prior.Rd | 2 +- man/ci.Rd | 6 +++--- man/convert_bayesian_as_frequentist.Rd | 2 +- man/describe_posterior.Rd | 2 +- man/diagnostic_posterior.Rd | 2 +- man/display.describe_posterior.Rd | 2 +- man/distribution.Rd | 5 +++-- man/effective_sample.Rd | 2 +- man/equivalence_test.Rd | 2 +- man/estimate_density.Rd | 2 +- man/eti.Rd | 2 +- man/hdi.Rd | 2 +- man/map_estimate.Rd | 2 +- man/mcse.Rd | 2 +- man/mediation.Rd | 2 +- man/p_direction.Rd | 4 ++-- man/p_map.Rd | 2 +- man/p_significance.Rd | 2 +- man/p_to_bf.Rd | 2 +- man/point_estimate.Rd | 2 +- man/rope.Rd | 2 +- man/rope_range.Rd | 2 +- man/sensitivity_to_prior.Rd | 2 +- man/si.Rd | 2 +- man/simulate_correlation.Rd | 2 +- man/simulate_simpson.Rd | 2 +- man/spi.Rd | 2 +- 33 files changed, 42 insertions(+), 41 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 794b11c39..02b9e2ef7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -122,7 +122,7 @@ VignetteBuilder: knitr Encoding: UTF-8 Language: en-US -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 Roxygen: list(markdown = TRUE) Config/testthat/edition: 3 Config/testthat/parallel: true diff --git a/man/bayesfactor.Rd b/man/bayesfactor.Rd index a7f391baa..2d5c42c56 100644 --- a/man/bayesfactor.Rd +++ b/man/bayesfactor.Rd @@ -78,7 +78,7 @@ For a complete overview of these functions, read the \href{https://easystats.git There is also a \href{https://easystats.github.io/see/articles/bayestestR.html}{\code{plot()}-method} implemented in the \href{https://easystats.github.io/see/}{\pkg{see}-package}. } \examples{ -\dontshow{if (require("rstanarm") && require("logspline")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (require("rstanarm") && require("logspline")) withAutoprint(\{ # examplesIf} \dontrun{ library(bayestestR) diff --git a/man/bayesfactor_inclusion.Rd b/man/bayesfactor_inclusion.Rd index b7ec3f95f..7647c7bdf 100644 --- a/man/bayesfactor_inclusion.Rd +++ b/man/bayesfactor_inclusion.Rd @@ -75,7 +75,7 @@ null-model). See also \code{effectsize::interpret_bf()}. } \examples{ -\dontshow{if (require("BayesFactor")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (require("BayesFactor")) withAutoprint(\{ # examplesIf} library(bayestestR) # Using bayesfactor_models: diff --git a/man/bayesfactor_models.Rd b/man/bayesfactor_models.Rd index 5d4a311ef..298296f4b 100644 --- a/man/bayesfactor_models.Rd +++ b/man/bayesfactor_models.Rd @@ -126,7 +126,7 @@ null-model). See also \code{effectsize::interpret_bf()}. } \examples{ -\dontshow{if (require("lme4") && require("BayesFactor") && require("rstanarm") && require("brms")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (require("lme4") && require("BayesFactor") && require("rstanarm") && require("brms")) withAutoprint(\{ # examplesIf} # With lm objects: # ---------------- lm1 <- lm(mpg ~ 1, data = mtcars) diff --git a/man/bayesfactor_parameters.Rd b/man/bayesfactor_parameters.Rd index bd5f05237..3524a3824 100644 --- a/man/bayesfactor_parameters.Rd +++ b/man/bayesfactor_parameters.Rd @@ -319,7 +319,7 @@ here. See also \href{https://easystats.github.io/insight/reference/find_paramete } \examples{ -\dontshow{if (require("logspline")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (require("logspline")) withAutoprint(\{ # examplesIf} library(bayestestR) prior <- distribution_normal(1000, mean = 0, sd = 1) posterior <- distribution_normal(1000, mean = .5, sd = .3) @@ -327,7 +327,7 @@ posterior <- distribution_normal(1000, mean = .5, sd = .3) as.numeric(BF_pars) \dontshow{\}) # examplesIf} -\dontshow{if (require("rstanarm") && require("emmeans") && require("logspline")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (require("rstanarm") && require("emmeans") && require("logspline")) withAutoprint(\{ # examplesIf} \donttest{ # rstanarm models # --------------- @@ -350,7 +350,7 @@ bayesfactor_parameters(group_diff, prior = stan_model, verbose = FALSE) # bayesfactor_parameters(group_diff, prior = group_diff_prior, verbose = FALSE) } \dontshow{\}) # examplesIf} -\dontshow{if (require("brms") && require("logspline")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (require("brms") && require("logspline")) withAutoprint(\{ # examplesIf} # brms models # ----------- \dontrun{ diff --git a/man/bayesfactor_restricted.Rd b/man/bayesfactor_restricted.Rd index 3529a82ee..cd6692c70 100644 --- a/man/bayesfactor_restricted.Rd +++ b/man/bayesfactor_restricted.Rd @@ -227,7 +227,7 @@ as.matrix(b) bool <- as.logical(b, which = "posterior") head(bool) -\dontshow{if (require("see") && require("patchwork")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (require("see") && require("patchwork")) withAutoprint(\{ # examplesIf} see::plots( plot(estimate_density(posterior)), @@ -238,7 +238,7 @@ see::plots( guides = "collect" ) \dontshow{\}) # examplesIf} -\dontshow{if (require("rstanarm")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (require("rstanarm")) withAutoprint(\{ # examplesIf} \donttest{ # rstanarm models # --------------- @@ -256,7 +256,7 @@ hyps <- c( bayesfactor_restricted(fit_stan, hypothesis = hyps) } \dontshow{\}) # examplesIf} -\dontshow{if (require("rstanarm") && require("emmeans")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (require("rstanarm") && require("emmeans")) withAutoprint(\{ # examplesIf} \donttest{ # emmGrid objects # --------------- diff --git a/man/check_prior.Rd b/man/check_prior.Rd index 623727e83..c6d0a9de6 100644 --- a/man/check_prior.Rd +++ b/man/check_prior.Rd @@ -88,7 +88,7 @@ posterior. This idea, and the accompanying heuristics, were discussed in \emph{Gelman et al. 2017}. } \examples{ -\dontshow{if (require("rstanarm") && require("see")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (require("rstanarm") && require("see")) withAutoprint(\{ # examplesIf} \donttest{ library(bayestestR) model <- rstanarm::stan_glm(mpg ~ wt + am, data = mtcars, chains = 1, refresh = 0) diff --git a/man/ci.Rd b/man/ci.Rd index 4b5c76395..239b5dda8 100644 --- a/man/ci.Rd +++ b/man/ci.Rd @@ -143,7 +143,7 @@ here. See also \href{https://easystats.github.io/insight/reference/find_paramete } \examples{ -\dontshow{if (require("rstanarm", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (require("rstanarm", quietly = TRUE)) withAutoprint(\{ # examplesIf} library(bayestestR) posterior <- rnorm(1000) @@ -161,12 +161,12 @@ model <- suppressWarnings(rstanarm::stan_glm( ci(model, method = "ETI", ci = c(0.80, 0.89)) ci(model, method = "HDI", ci = c(0.80, 0.89)) \dontshow{\}) # examplesIf} -\dontshow{if (require("BayesFactor", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (require("BayesFactor", quietly = TRUE)) withAutoprint(\{ # examplesIf} bf <- BayesFactor::ttestBF(x = rnorm(100, 1, 1)) ci(bf, method = "ETI") ci(bf, method = "HDI") \dontshow{\}) # examplesIf} -\dontshow{if (require("emmeans", quietly = TRUE) && require("rstanarm", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (require("emmeans", quietly = TRUE) && require("rstanarm", quietly = TRUE)) withAutoprint(\{ # examplesIf} model <- emmeans::emtrends(model, ~1, "wt", data = mtcars) ci(model, method = "ETI") ci(model, method = "HDI") diff --git a/man/convert_bayesian_as_frequentist.Rd b/man/convert_bayesian_as_frequentist.Rd index 91742eea0..549040d08 100644 --- a/man/convert_bayesian_as_frequentist.Rd +++ b/man/convert_bayesian_as_frequentist.Rd @@ -23,7 +23,7 @@ likelihood (\code{FALSE})?} Refit Bayesian model as frequentist. Can be useful for comparisons. } \examples{ -\dontshow{if (require("rstanarm")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (require("rstanarm")) withAutoprint(\{ # examplesIf} \donttest{ # Rstanarm ---------------------- # Simple regressions diff --git a/man/describe_posterior.Rd b/man/describe_posterior.Rd index d56039100..c9141cba4 100644 --- a/man/describe_posterior.Rd +++ b/man/describe_posterior.Rd @@ -203,7 +203,7 @@ here. See also \href{https://easystats.github.io/insight/reference/find_paramete } \examples{ -\dontshow{if (all(insight::check_if_installed(c("logspline", "rstanarm", "emmeans", "BayesFactor"), quietly = TRUE))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (all(insight::check_if_installed(c("logspline", "rstanarm", "emmeans", "BayesFactor"), quietly = TRUE))) withAutoprint(\{ # examplesIf} library(bayestestR) x <- rnorm(1000) diff --git a/man/diagnostic_posterior.Rd b/man/diagnostic_posterior.Rd index fcbd21f65..d601bf22e 100644 --- a/man/diagnostic_posterior.Rd +++ b/man/diagnostic_posterior.Rd @@ -123,7 +123,7 @@ here. See also \href{https://easystats.github.io/insight/reference/find_paramete } \examples{ -\dontshow{if (require("rstanarm") && require("brms")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (require("rstanarm") && require("brms")) withAutoprint(\{ # examplesIf} \donttest{ # rstanarm models # ----------------------------------------------- diff --git a/man/display.describe_posterior.Rd b/man/display.describe_posterior.Rd index e7cca97df..0e43156cc 100644 --- a/man/display.describe_posterior.Rd +++ b/man/display.describe_posterior.Rd @@ -51,7 +51,7 @@ to PDF or Word files. See for examples. } \examples{ -\dontshow{if (all(insight::check_if_installed(c("tinytable", "gt"), quietly = TRUE))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (all(insight::check_if_installed(c("tinytable", "gt"), quietly = TRUE))) withAutoprint(\{ # examplesIf} \donttest{ d <- data.frame(replicate(4, rnorm(20))) result <- describe_posterior(d) diff --git a/man/distribution.Rd b/man/distribution.Rd index b84658550..97c50b53e 100644 --- a/man/distribution.Rd +++ b/man/distribution.Rd @@ -66,7 +66,8 @@ distribution_uniform(n, min = 0, max = 1, random = FALSE, ...) \item{...}{Arguments passed to or from other methods.} -\item{n}{the number of observations} +\item{n}{number of observations. If \code{length(n) > 1}, the length + is taken to be the number required.} \item{random}{Generate near-perfect or random (simple wrappers for the base R \verb{r*} functions) distributions. When \code{random = FALSE}, these function return @@ -90,7 +91,7 @@ distribution_uniform(n, min = 0, max = 1, random = FALSE, ...) \item{sd}{vector of standard deviations.} -\item{mu}{the mean} +\item{mu}{alternative parametrization via mean: see \sQuote{Details}.} \item{phi}{Corresponding to \code{glmmTMB}'s implementation of nbinom distribution, where \code{size=mu/phi}.} diff --git a/man/effective_sample.Rd b/man/effective_sample.Rd index 3125222f4..2a0c98b21 100644 --- a/man/effective_sample.Rd +++ b/man/effective_sample.Rd @@ -122,7 +122,7 @@ here. See also \href{https://easystats.github.io/insight/reference/find_paramete } \examples{ -\dontshow{if (all(insight::check_if_installed(c("rstanarm", "brms", "posterior"), quietly = TRUE))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (all(insight::check_if_installed(c("rstanarm", "brms", "posterior"), quietly = TRUE))) withAutoprint(\{ # examplesIf} \donttest{ model <- suppressWarnings(rstanarm::stan_glm( mpg ~ wt + gear, diff --git a/man/equivalence_test.Rd b/man/equivalence_test.Rd index 9a2fb9618..7485e33dc 100644 --- a/man/equivalence_test.Rd +++ b/man/equivalence_test.Rd @@ -203,7 +203,7 @@ here. See also \href{https://easystats.github.io/insight/reference/find_paramete } \examples{ -\dontshow{if (all(insight::check_if_installed(c("rstanarm", "brms", "emmeans", "BayesFactor", "see"), quietly = TRUE))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (all(insight::check_if_installed(c("rstanarm", "brms", "emmeans", "BayesFactor", "see"), quietly = TRUE))) withAutoprint(\{ # examplesIf} library(bayestestR) equivalence_test(x = rnorm(1000, 0, 0.01), range = c(-0.1, 0.1)) diff --git a/man/estimate_density.Rd b/man/estimate_density.Rd index 30fa4100b..128a388e6 100644 --- a/man/estimate_density.Rd +++ b/man/estimate_density.Rd @@ -152,7 +152,7 @@ here. See also \href{https://easystats.github.io/insight/reference/find_paramete } \examples{ -\dontshow{if (require("logspline") && require("KernSmooth") && require("mclust") && require("emmeans") && require("rstanarm") && require("brms")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (require("logspline") && require("KernSmooth") && require("mclust") && require("emmeans") && require("rstanarm") && require("brms")) withAutoprint(\{ # examplesIf} library(bayestestR) set.seed(1) diff --git a/man/eti.Rd b/man/eti.Rd index 11c7e25fb..3201750ad 100644 --- a/man/eti.Rd +++ b/man/eti.Rd @@ -183,7 +183,7 @@ here. See also \href{https://easystats.github.io/insight/reference/find_paramete } \examples{ -\dontshow{if (require("rstanarm") && require("emmeans") && require("brms") && require("BayesFactor")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (require("rstanarm") && require("emmeans") && require("brms") && require("BayesFactor")) withAutoprint(\{ # examplesIf} library(bayestestR) posterior <- rnorm(1000) diff --git a/man/hdi.Rd b/man/hdi.Rd index a020a1e84..8844ee518 100644 --- a/man/hdi.Rd +++ b/man/hdi.Rd @@ -185,7 +185,7 @@ here. See also \href{https://easystats.github.io/insight/reference/find_paramete } \examples{ -\dontshow{if (all(insight::check_if_installed(c("rstanarm", "brms", "emmeans", "BayesFactor"), quietly = TRUE))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (all(insight::check_if_installed(c("rstanarm", "brms", "emmeans", "BayesFactor"), quietly = TRUE))) withAutoprint(\{ # examplesIf} library(bayestestR) posterior <- rnorm(1000) diff --git a/man/map_estimate.Rd b/man/map_estimate.Rd index c9b78350e..c1b47b57c 100644 --- a/man/map_estimate.Rd +++ b/man/map_estimate.Rd @@ -154,7 +154,7 @@ here. See also \href{https://easystats.github.io/insight/reference/find_paramete } \examples{ -\dontshow{if (require("rstanarm") && require("brms")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (require("rstanarm") && require("brms")) withAutoprint(\{ # examplesIf} \donttest{ library(bayestestR) diff --git a/man/mcse.Rd b/man/mcse.Rd index 6c927543d..e3e3552d4 100644 --- a/man/mcse.Rd +++ b/man/mcse.Rd @@ -95,7 +95,7 @@ here. See also \href{https://easystats.github.io/insight/reference/find_paramete } \examples{ -\dontshow{if (require("rstanarm")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (require("rstanarm")) withAutoprint(\{ # examplesIf} \donttest{ library(bayestestR) diff --git a/man/mediation.Rd b/man/mediation.Rd index f9541be92..2cdcc7edf 100644 --- a/man/mediation.Rd +++ b/man/mediation.Rd @@ -97,7 +97,7 @@ samples of the effects, which can be used for further processing in the \strong{bayestestR} package. } \examples{ -\dontshow{if (require("mediation") && require("brms") && require("rstanarm")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (require("mediation") && require("brms") && require("rstanarm")) withAutoprint(\{ # examplesIf} \donttest{ library(mediation) library(brms) diff --git a/man/p_direction.Rd b/man/p_direction.Rd index 3923c635d..ddaf7160e 100644 --- a/man/p_direction.Rd +++ b/man/p_direction.Rd @@ -252,7 +252,7 @@ here. See also \href{https://easystats.github.io/insight/reference/find_paramete } \examples{ -\dontshow{if (requireNamespace("rstanarm", quietly = TRUE) && requireNamespace("emmeans", quietly = TRUE) && requireNamespace("brms", quietly = TRUE) && requireNamespace("BayesFactor", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (requireNamespace("rstanarm", quietly = TRUE) && requireNamespace("emmeans", quietly = TRUE) && requireNamespace("brms", quietly = TRUE) && requireNamespace("BayesFactor", quietly = TRUE)) withAutoprint(\{ # examplesIf} library(bayestestR) # Simulate a posterior distribution of mean 1 and SD 1 @@ -294,7 +294,7 @@ p_direction(bf) p_direction(bf, method = "kernel") } \dontshow{\}) # examplesIf} -\dontshow{if (requireNamespace("posterior", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (requireNamespace("posterior", quietly = TRUE)) withAutoprint(\{ # examplesIf} # Using "rvar_col" x <- data.frame(mu = c(0, 0.5, 1), sigma = c(1, 0.5, 0.25)) x$my_rvar <- posterior::rvar_rng(rnorm, 3, mean = x$mu, sd = x$sigma) diff --git a/man/p_map.Rd b/man/p_map.Rd index c87d96d34..b5f30dc58 100644 --- a/man/p_map.Rd +++ b/man/p_map.Rd @@ -156,7 +156,7 @@ here. See also \href{https://easystats.github.io/insight/reference/find_paramete } \examples{ -\dontshow{if (require("rstanarm") && require("emmeans") && require("brms") && require("BayesFactor")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (require("rstanarm") && require("emmeans") && require("brms") && require("BayesFactor")) withAutoprint(\{ # examplesIf} library(bayestestR) p_map(rnorm(1000, 0, 1)) diff --git a/man/p_significance.Rd b/man/p_significance.Rd index 552e86dbd..8e6b25b99 100644 --- a/man/p_significance.Rd +++ b/man/p_significance.Rd @@ -160,7 +160,7 @@ here. See also \href{https://easystats.github.io/insight/reference/find_paramete } \examples{ -\dontshow{if (require("rstanarm")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (require("rstanarm")) withAutoprint(\{ # examplesIf} library(bayestestR) # Simulate a posterior distribution of mean 1 and SD 1 diff --git a/man/p_to_bf.Rd b/man/p_to_bf.Rd index 9a9c0af3b..66b81e18e 100644 --- a/man/p_to_bf.Rd +++ b/man/p_to_bf.Rd @@ -33,7 +33,7 @@ It might therefore be not reliable. Use at your own risks. For more accurate approximate Bayes factors, use \code{\link[=bic_to_bf]{bic_to_bf()}} instead. } \examples{ -\dontshow{if (require("parameters")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (require("parameters")) withAutoprint(\{ # examplesIf} data(iris) model <- lm(Petal.Length ~ Sepal.Length + Species, data = iris) p_to_bf(model) diff --git a/man/point_estimate.Rd b/man/point_estimate.Rd index 4428fc6f0..fe50f3df1 100644 --- a/man/point_estimate.Rd +++ b/man/point_estimate.Rd @@ -149,7 +149,7 @@ here. See also \href{https://easystats.github.io/insight/reference/find_paramete } \examples{ -\dontshow{if (require("rstanarm") && require("emmeans") && require("brms") && require("BayesFactor")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (require("rstanarm") && require("emmeans") && require("brms") && require("BayesFactor")) withAutoprint(\{ # examplesIf} library(bayestestR) point_estimate(rnorm(1000)) diff --git a/man/rope.Rd b/man/rope.Rd index 2d0a35b9e..827618b55 100644 --- a/man/rope.Rd +++ b/man/rope.Rd @@ -237,7 +237,7 @@ here. See also \href{https://easystats.github.io/insight/reference/find_paramete } \examples{ -\dontshow{if (all(insight::check_if_installed(c("rstanarm", "emmeans", "brms", "BayesFactor"), quietly = TRUE))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (all(insight::check_if_installed(c("rstanarm", "emmeans", "brms", "BayesFactor"), quietly = TRUE))) withAutoprint(\{ # examplesIf} library(bayestestR) rope(x = rnorm(1000, 0, 0.01), range = c(-0.1, 0.1)) diff --git a/man/rope_range.Rd b/man/rope_range.Rd index d92efec78..642d7a88a 100644 --- a/man/rope_range.Rd +++ b/man/rope_range.Rd @@ -47,7 +47,7 @@ but it is strongly advised to specify it manually. } } \examples{ -\dontshow{if (require("rstanarm") && require("brms") && require("BayesFactor")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (require("rstanarm") && require("brms") && require("BayesFactor")) withAutoprint(\{ # examplesIf} \donttest{ model <- suppressWarnings(rstanarm::stan_glm( mpg ~ wt + gear, diff --git a/man/sensitivity_to_prior.Rd b/man/sensitivity_to_prior.Rd index 4e29813f3..a77bdcaad 100644 --- a/man/sensitivity_to_prior.Rd +++ b/man/sensitivity_to_prior.Rd @@ -30,7 +30,7 @@ antagonistic prior (a prior of same shape located on the opposite of the effect). } \examples{ -\dontshow{if (require("rstanarm")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (require("rstanarm")) withAutoprint(\{ # examplesIf} \donttest{ library(bayestestR) diff --git a/man/si.Rd b/man/si.Rd index 455fd883b..1cfe88e7c 100644 --- a/man/si.Rd +++ b/man/si.Rd @@ -186,7 +186,7 @@ any transformations -- \code{"log"}, \code{"response"}, etc. -- or any \code{reg } \examples{ -\dontshow{if (require("logspline") && require("rstanarm") && require("brms") && require("emmeans")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (require("logspline") && require("rstanarm") && require("brms") && require("emmeans")) withAutoprint(\{ # examplesIf} library(bayestestR) prior <- distribution_normal(1000, mean = 0, sd = 1) diff --git a/man/simulate_correlation.Rd b/man/simulate_correlation.Rd index 8a4ad9b8b..ed0bc058e 100644 --- a/man/simulate_correlation.Rd +++ b/man/simulate_correlation.Rd @@ -33,7 +33,7 @@ the groups.} Simulate data with specific characteristics. } \examples{ -\dontshow{if (requireNamespace("MASS", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (requireNamespace("MASS", quietly = TRUE)) withAutoprint(\{ # examplesIf} # Correlation -------------------------------- data <- simulate_correlation(r = 0.5) diff --git a/man/simulate_simpson.Rd b/man/simulate_simpson.Rd index 80cc62c2e..65e6a3828 100644 --- a/man/simulate_simpson.Rd +++ b/man/simulate_simpson.Rd @@ -33,7 +33,7 @@ and statistics, in which a trend appears in several different groups of data but disappears or reverses when these groups are combined. } \examples{ -\dontshow{if (requireNamespace("MASS", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (requireNamespace("MASS", quietly = TRUE)) withAutoprint(\{ # examplesIf} data <- simulate_simpson(n = 10, groups = 5, r = 0.5) if (require("ggplot2")) { diff --git a/man/spi.Rd b/man/spi.Rd index 59f773a30..0e961b15a 100644 --- a/man/spi.Rd +++ b/man/spi.Rd @@ -117,7 +117,7 @@ and slightly modified to be more robust for Stan models. Thus, credits go to Ying Liu for the original SPI algorithm and R implementation. } \examples{ -\dontshow{if (require("quadprog") && require("rstanarm")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (require("quadprog") && require("rstanarm")) withAutoprint(\{ # examplesIf} library(bayestestR) posterior <- rnorm(1000) From 6d00d58d1bf57f6cb21c8cfba85e6d57831c3cae Mon Sep 17 00:00:00 2001 From: "Mattan S. Ben-Shachar" Date: Sun, 28 Sep 2025 10:12:08 +0300 Subject: [PATCH 13/36] pd etc. --- NAMESPACE | 5 +++++ R/p_direction.R | 3 +++ R/p_map.R | 3 +++ R/p_rope.R | 3 +++ R/p_significance.R | 3 +++ R/rope.R | 3 +++ R/utils_check_collinearity.R | 2 ++ 7 files changed, 22 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 17733e2a3..d572c1c7e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -330,6 +330,7 @@ S3method(mediation,stanmvreg) S3method(model_to_priors,brmsfit) S3method(p_direction,BFBayesFactor) S3method(p_direction,BGGM) +S3method(p_direction,CmdStanFit) S3method(p_direction,MCMCglmm) S3method(p_direction,bamlss) S3method(p_direction,bayesQR) @@ -357,6 +358,7 @@ S3method(p_direction,stanfit) S3method(p_direction,stanreg) S3method(p_map,BFBayesFactor) S3method(p_map,BGGM) +S3method(p_map,CmdStanFit) S3method(p_map,MCMCglmm) S3method(p_map,bamlss) S3method(p_map,bayesQR) @@ -382,6 +384,7 @@ S3method(p_map,stanfit) S3method(p_map,stanreg) S3method(p_rope,BFBayesFactor) S3method(p_rope,BGGM) +S3method(p_rope,CmdStanFit) S3method(p_rope,MCMCglmm) S3method(p_rope,bamlss) S3method(p_rope,bcplm) @@ -406,6 +409,7 @@ S3method(p_rope,stanfit) S3method(p_rope,stanreg) S3method(p_significance,BFBayesFactor) S3method(p_significance,BGGM) +S3method(p_significance,CmdStanFit) S3method(p_significance,MCMCglmm) S3method(p_significance,bamlss) S3method(p_significance,bayesQR) @@ -528,6 +532,7 @@ S3method(print_md,p_significance) S3method(print_md,point_estimate) S3method(rope,BFBayesFactor) S3method(rope,BGGM) +S3method(rope,CmdStanFit) S3method(rope,MCMCglmm) S3method(rope,bamlss) S3method(rope,bayesQR) diff --git a/R/p_direction.R b/R/p_direction.R index 0d84b040c..61675c4e5 100644 --- a/R/p_direction.R +++ b/R/p_direction.R @@ -554,6 +554,9 @@ p_direction.stanreg <- function(x, #' @export p_direction.stanfit <- p_direction.stanreg +#' @export +p_direction.CmdStanFit <- p_direction.stanreg + #' @export p_direction.blavaan <- p_direction.stanreg diff --git a/R/p_map.R b/R/p_map.R index 963c204f7..81ccfe627 100644 --- a/R/p_map.R +++ b/R/p_map.R @@ -359,6 +359,9 @@ p_map.stanreg <- function(x, #' @export p_map.stanfit <- p_map.stanreg +#' @export +p_map.CmdStanFit <- p_map.stanreg + #' @export p_map.blavaan <- p_map.stanreg diff --git a/R/p_rope.R b/R/p_rope.R index 448915849..f78e33924 100644 --- a/R/p_rope.R +++ b/R/p_rope.R @@ -133,6 +133,9 @@ p_rope.stanreg <- function(x, #' @export p_rope.stanfit <- p_rope.stanreg +#' @export +p_rope.CmdStanFit <- p_rope.stanreg + #' @export p_rope.blavaan <- p_rope.stanreg diff --git a/R/p_significance.R b/R/p_significance.R index 2178695af..d9815f6f2 100644 --- a/R/p_significance.R +++ b/R/p_significance.R @@ -323,6 +323,9 @@ p_significance.stanreg <- function(x, #' @export p_significance.stanfit <- p_significance.stanreg +#' @export +p_significance.CmdStanFit <- p_significance.stanreg + #' @export p_significance.blavaan <- p_significance.stanreg diff --git a/R/rope.R b/R/rope.R index d24f41c63..1cde51541 100644 --- a/R/rope.R +++ b/R/rope.R @@ -554,6 +554,9 @@ rope.stanreg <- function(x, #' @export rope.stanfit <- rope.stanreg +#' @export +rope.CmdStanFit <- rope.stanreg + #' @export rope.blavaan <- rope.stanreg diff --git a/R/utils_check_collinearity.R b/R/utils_check_collinearity.R index e2ed57a01..0ed087c42 100644 --- a/R/utils_check_collinearity.R +++ b/R/utils_check_collinearity.R @@ -2,6 +2,8 @@ .check_multicollinearity <- function(model, method = "equivalence_test", threshold = 0.7, ...) { + if (inherits(model, "CmdStanFit")) return() + valid_parameters <- insight::find_parameters( model, parameters = "^(?!(r_|sd_|prior_|cor_|lp__|b\\[))", From e03b2d57978ca7bb65261d288c13818c2765fb6e Mon Sep 17 00:00:00 2001 From: "Mattan S. Ben-Shachar" Date: Sun, 28 Sep 2025 10:16:52 +0300 Subject: [PATCH 14/36] describe_posterior --- NAMESPACE | 1 + R/describe_posterior.R | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index d572c1c7e..5c94c5196 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -114,6 +114,7 @@ S3method(ci,stanfit) S3method(ci,stanreg) S3method(describe_posterior,BFBayesFactor) S3method(describe_posterior,BGGM) +S3method(describe_posterior,CmdStanFit) S3method(describe_posterior,MCMCglmm) S3method(describe_posterior,bamlss) S3method(describe_posterior,bayesQR) diff --git a/R/describe_posterior.R b/R/describe_posterior.R index 56bf9de67..69c13a56d 100644 --- a/R/describe_posterior.R +++ b/R/describe_posterior.R @@ -148,7 +148,7 @@ describe_posterior.default <- function(posterior, ...) { if (!is.data.frame(x) && !is.numeric(x)) { is_stanmvreg <- inherits(x, "stanmvreg") - cleaned_parameters <- insight::clean_parameters(x) + cleaned_parameters <- .get_cleaned_parameters(x) # rename to use `x` in bayes factor later x_df <- insight::get_parameters(x, ...) } else { @@ -289,7 +289,7 @@ describe_posterior.default <- function(posterior, ...) { } ## TODO no BF for arm::sim - if (inherits(x, c("sim", "sim.merMod", "mcmc", "stanfit"))) { + if (inherits(x, c("sim", "sim.merMod", "mcmc"))) { test <- setdiff(test, "bf") } @@ -1253,6 +1253,9 @@ describe_posterior.brmsfit <- function(posterior, #' @export describe_posterior.blavaan <- describe_posterior.stanfit +#' @export +describe_posterior.CmdStanFit <- describe_posterior.stanfit + # other models -------------------------------- From 3bdbf1163daac60cc2d2573d9633470cc8d521d5 Mon Sep 17 00:00:00 2001 From: "Mattan S. Ben-Shachar" Date: Sun, 28 Sep 2025 10:20:05 +0300 Subject: [PATCH 15/36] equivalence_test --- NAMESPACE | 1 + R/equivalence_test.R | 3 +++ R/print.equivalence_test.R | 4 ++-- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 5c94c5196..b18691f37 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -182,6 +182,7 @@ S3method(effective_sample,stanfit) S3method(effective_sample,stanmvreg) S3method(effective_sample,stanreg) S3method(equivalence_test,BFBayesFactor) +S3method(equivalence_test,CmdStanFit) S3method(equivalence_test,bamlss) S3method(equivalence_test,bayesQR) S3method(equivalence_test,bcplm) diff --git a/R/equivalence_test.R b/R/equivalence_test.R index d165ccac5..167c9ebe7 100644 --- a/R/equivalence_test.R +++ b/R/equivalence_test.R @@ -318,6 +318,9 @@ equivalence_test.stanreg <- function(x, #' @export equivalence_test.stanfit <- equivalence_test.stanreg +#' @export +equivalence_test.CmdStanFit <- equivalence_test.stanreg + #' @export equivalence_test.blavaan <- equivalence_test.stanreg diff --git a/R/print.equivalence_test.R b/R/print.equivalence_test.R index 12022f80d..8440adb21 100644 --- a/R/print.equivalence_test.R +++ b/R/print.equivalence_test.R @@ -10,8 +10,8 @@ print.equivalence_test <- function(x, digits = 2, ...) { # fix "sd" pattern model <- .retrieve_model(x) if (!is.null(model) && !is.data.frame(model)) { - cp <- insight::clean_parameters(model) - if (!is.null(cp$Group) && any(startsWith(cp$Group, "SD/Cor"))) { + cp <- .get_cleaned_parameters(model) + if (!is.null(cp) && !is.null(cp$Group) && any(startsWith(cp$Group, "SD/Cor"))) { cp <- cp[startsWith(cp$Group, "SD/Cor"), ] matches <- match(cp$Parameter, x$Parameter) if (length(matches)) { From 46036761b94c7d3b8485897483e93fb7cc8fafcb Mon Sep 17 00:00:00 2001 From: "Mattan S. Ben-Shachar" Date: Sun, 28 Sep 2025 10:49:29 +0300 Subject: [PATCH 16/36] bf_models --- NAMESPACE | 1 + NEWS.md | 2 ++ R/bayesfactor_models.R | 50 +++++++++++++++++++++++++----------------- 3 files changed, 33 insertions(+), 20 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index b18691f37..fb2aabfde 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -27,6 +27,7 @@ S3method(bayesfactor_models,BFBayesFactor) S3method(bayesfactor_models,blavaan) S3method(bayesfactor_models,brmsfit) S3method(bayesfactor_models,default) +S3method(bayesfactor_models,stanfit) S3method(bayesfactor_models,stanreg) S3method(bayesfactor_parameters,CmdStanFit) S3method(bayesfactor_parameters,bayesfactor_models) diff --git a/NEWS.md b/NEWS.md index 3788b0879..0c5601ea1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,6 +5,8 @@ * `as.matrix()` for `bayesfactor_restricted()`, to obtain a matrix of Bayes factors between all restricted models. * New dedicated docs for Bayes factor methods `?bayesfactor_methods` +* Added support for `CmdStanFit` models from `{cmdstanr}` and expanded support for `stanfit` models from `rstan`. + ## Changes diff --git a/R/bayesfactor_models.R b/R/bayesfactor_models.R index 149abc786..6e0e5ba10 100644 --- a/R/bayesfactor_models.R +++ b/R/bayesfactor_models.R @@ -217,9 +217,9 @@ bayesfactor_models.default <- function(..., denominator = 1, verbose = TRUE) { # Get BIC if (were_checked && estimator == "REML" && - any(vapply(mods, insight::is_mixed_model, TRUE)) && - !isTRUE(attr(model_objects, "same_fixef")) && - verbose) { + any(vapply(mods, insight::is_mixed_model, TRUE)) && + !isTRUE(attr(model_objects, "same_fixef")) && + verbose) { insight::format_warning(paste( "Information criteria (like BIC) based on REML fits (i.e. `estimator=\"REML\"`)", "are not recommended for comparison between models with different fixed effects.", @@ -250,10 +250,10 @@ bayesfactor_models.default <- function(..., denominator = 1, verbose = TRUE) { ) .bf_models_output(res, - denominator = denominator, - bf_method = "BIC approximation", - unsupported_models = !all(supported_models), - model_names = names(mods) + denominator = denominator, + bf_method = "BIC approximation", + unsupported_models = !all(supported_models), + model_names = names(mods) ) } @@ -279,13 +279,13 @@ bayesfactor_models.default <- function(..., denominator = 1, verbose = TRUE) { } else { res <- .bayesfactor_models_stan_REG(mods, denominator, verbose) bf_method <- "marginal likelihoods (bridgesampling)" - unsupported_models <- FALSE + unsupported_models <- if (inherits(mods[[1]], c("stanfit", "CmdStanFit"))) TRUE else FALSE } .bf_models_output(res, - denominator = denominator, - bf_method = bf_method, - unsupported_models = unsupported_models + denominator = denominator, + bf_method = bf_method, + unsupported_models = unsupported_models ) } @@ -296,13 +296,18 @@ bayesfactor_models.default <- function(..., denominator = 1, verbose = TRUE) { # Test that all is good: resps <- lapply(mods, insight::get_response) - from_same_data_as_den <- sapply(resps[-denominator], - identical, - y = resps[[denominator]] - ) + if (all(!sapply(resps, is.null))) { + from_same_data_as_den <- sapply( + resps[-denominator], + identical, + y = resps[[denominator]] + ) - if (!all(from_same_data_as_den)) { - insight::format_error("Models were not computed from the same data.") + if (!all(from_same_data_as_den)) { + insight::format_error("Models were not computed from the same data.") + } + } else if (verbose) { + insight::format_alert("Unable to validate that all models were fit with the same data.") } mML <- lapply(mods, .get_marglik, verbose = verbose) @@ -314,6 +319,7 @@ bayesfactor_models.default <- function(..., denominator = 1, verbose = TRUE) { # Get formula mforms <- sapply(mods, .find_full_formula) + mforms[!nzchar(mforms)] <- names(mforms)[!nzchar(mforms)] res <- data.frame( Model = mforms, @@ -372,6 +378,10 @@ bayesfactor_models.brmsfit <- bayesfactor_models.stanreg bayesfactor_models.blavaan <- bayesfactor_models.stanreg +#' @export +bayesfactor_models.stanfit <- bayesfactor_models.stanreg + + #' @export bayesfactor_models.BFBayesFactor <- function(..., verbose = TRUE) { models <- c(...) @@ -394,9 +404,9 @@ bayesfactor_models.BFBayesFactor <- function(..., verbose = TRUE) { ) .bf_models_output(res, - denominator = 1, - bf_method = "JZS (BayesFactor)", - unsupported_models = !inherits(models@denominator, "BFlinearModel") + denominator = 1, + bf_method = "JZS (BayesFactor)", + unsupported_models = !inherits(models@denominator, "BFlinearModel") ) } From 585bff3f8066c458a26e37c8bddf810b12c3cd50 Mon Sep 17 00:00:00 2001 From: "Mattan S. Ben-Shachar" Date: Sun, 28 Sep 2025 10:51:57 +0300 Subject: [PATCH 17/36] styler --- R/bayesfactor-methods.R | 4 ++-- R/bayesfactor_models.R | 26 ++++++++++++------------ R/diagnostic_posterior.R | 35 +++++++++++++++++++-------------- R/mcse.R | 2 +- R/utils_check_collinearity.R | 4 +++- R/utils_clean_stan_parameters.R | 3 --- 6 files changed, 39 insertions(+), 35 deletions(-) diff --git a/R/bayesfactor-methods.R b/R/bayesfactor-methods.R index c5a70b067..52b930132 100644 --- a/R/bayesfactor-methods.R +++ b/R/bayesfactor-methods.R @@ -131,8 +131,8 @@ print.bayesfactor_matrix <- function(x, log = FALSE, ...) { # caption and footer caption <- switch(attr(orig_x, "bf_fun"), - "bayesfactor_restricted()" = "# Bayes Factors for Restricted Models", - "# Bayes Factors for Model Comparison" + "bayesfactor_restricted()" = "# Bayes Factors for Restricted Models", + "# Bayes Factors for Model Comparison" ) footer <- if (log) c("\nBayes Factors are on the log-scale.\n", "red") diff --git a/R/bayesfactor_models.R b/R/bayesfactor_models.R index 6e0e5ba10..0ce6332c4 100644 --- a/R/bayesfactor_models.R +++ b/R/bayesfactor_models.R @@ -217,9 +217,9 @@ bayesfactor_models.default <- function(..., denominator = 1, verbose = TRUE) { # Get BIC if (were_checked && estimator == "REML" && - any(vapply(mods, insight::is_mixed_model, TRUE)) && - !isTRUE(attr(model_objects, "same_fixef")) && - verbose) { + any(vapply(mods, insight::is_mixed_model, TRUE)) && + !isTRUE(attr(model_objects, "same_fixef")) && + verbose) { insight::format_warning(paste( "Information criteria (like BIC) based on REML fits (i.e. `estimator=\"REML\"`)", "are not recommended for comparison between models with different fixed effects.", @@ -250,10 +250,10 @@ bayesfactor_models.default <- function(..., denominator = 1, verbose = TRUE) { ) .bf_models_output(res, - denominator = denominator, - bf_method = "BIC approximation", - unsupported_models = !all(supported_models), - model_names = names(mods) + denominator = denominator, + bf_method = "BIC approximation", + unsupported_models = !all(supported_models), + model_names = names(mods) ) } @@ -283,9 +283,9 @@ bayesfactor_models.default <- function(..., denominator = 1, verbose = TRUE) { } .bf_models_output(res, - denominator = denominator, - bf_method = bf_method, - unsupported_models = unsupported_models + denominator = denominator, + bf_method = bf_method, + unsupported_models = unsupported_models ) } @@ -404,9 +404,9 @@ bayesfactor_models.BFBayesFactor <- function(..., verbose = TRUE) { ) .bf_models_output(res, - denominator = 1, - bf_method = "JZS (BayesFactor)", - unsupported_models = !inherits(models@denominator, "BFlinearModel") + denominator = 1, + bf_method = "JZS (BayesFactor)", + unsupported_models = !inherits(models@denominator, "BFlinearModel") ) } diff --git a/R/diagnostic_posterior.R b/R/diagnostic_posterior.R index 61f12c2b8..1c12f1d2c 100644 --- a/R/diagnostic_posterior.R +++ b/R/diagnostic_posterior.R @@ -209,10 +209,10 @@ diagnostic_posterior.brmsfit <- function(posterior, ...) { # Find parameters params <- insight::find_parameters(posterior, - effects = effects, - component = component, - parameters = parameters, - flatten = TRUE + effects = effects, + component = component, + parameters = parameters, + flatten = TRUE ) # If no diagnostic @@ -286,8 +286,8 @@ diagnostic_posterior.stanfit <- function(posterior, diagnostic = "all", effects insight::check_if_installed("rstan") all_params <- insight::find_parameters(posterior, - effects = effects, - flatten = TRUE + effects = effects, + flatten = TRUE ) diagnostic_df <- data.frame( @@ -331,15 +331,20 @@ diagnostic_posterior.CmdStanFit <- function(posterior, draws <- posterior$draws(format = "draws_df") out <- posterior::summarize_draws(draws, - posterior::default_convergence_measures(), - MCSE = posterior::mcse_mean) - out <- datawizard::data_rename(as.data.frame(out), - c(Parameter = "variable", - ESS = "ess_bulk", - ESS_tail = "ess_tail", - Rhat = "rhat")) - - out[!grepl("^lp_", out$Parameter), c("Parameter", diagnostic), drop = FALSE] + posterior::default_convergence_measures(), + MCSE = posterior::mcse_mean + ) + out <- datawizard::data_rename( + as.data.frame(out), + c( + Parameter = "variable", + ESS = "ess_bulk", + ESS_tail = "ess_tail", + Rhat = "rhat" + ) + ) + + out[!grepl("^lp_", out$Parameter), c("Parameter", diagnostic), drop = FALSE] } diff --git a/R/mcse.R b/R/mcse.R index 463222452..4109e76e8 100644 --- a/R/mcse.R +++ b/R/mcse.R @@ -113,4 +113,4 @@ mcse.blavaan <- mcse.stanreg #' @export mcse.CmdStanFit <- function(model, ...) { diagnostic_posterior(model, diagnostic = "MCSE") -} \ No newline at end of file +} diff --git a/R/utils_check_collinearity.R b/R/utils_check_collinearity.R index 0ed087c42..7710ecd51 100644 --- a/R/utils_check_collinearity.R +++ b/R/utils_check_collinearity.R @@ -2,7 +2,9 @@ .check_multicollinearity <- function(model, method = "equivalence_test", threshold = 0.7, ...) { - if (inherits(model, "CmdStanFit")) return() + if (inherits(model, "CmdStanFit")) { + return() + } valid_parameters <- insight::find_parameters( model, diff --git a/R/utils_clean_stan_parameters.R b/R/utils_clean_stan_parameters.R index 78d87d558..50ff5bf66 100644 --- a/R/utils_clean_stan_parameters.R +++ b/R/utils_clean_stan_parameters.R @@ -46,6 +46,3 @@ } .get_cleaned_parameters.CmdStanFit <- .get_cleaned_parameters.stanfit - - - From 90f6a651ae0085049139f85623d1abb437749c3c Mon Sep 17 00:00:00 2001 From: "Mattan S. Ben-Shachar" <35330040+mattansb@users.noreply.github.com> Date: Fri, 17 Oct 2025 13:35:59 +0300 Subject: [PATCH 18/36] Update R/bayesfactor-methods.R Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- R/bayesfactor-methods.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/bayesfactor-methods.R b/R/bayesfactor-methods.R index 52b930132..34e299dac 100644 --- a/R/bayesfactor-methods.R +++ b/R/bayesfactor-methods.R @@ -99,7 +99,9 @@ print.bayesfactor_matrix <- function(x, log = FALSE, ...) { # Format values x <- unclass(x) if (log) { - if (!orig_log) x <- log(x) + if (!orig_log) { + x <- log(x) + } sgn <- sign(x) < 0 x <- insight::format_value(abs(x), digits = 2, ...) From ed52e0790f85a1d70142b9097cca882c043fb220 Mon Sep 17 00:00:00 2001 From: "Mattan S. Ben-Shachar" <35330040+mattansb@users.noreply.github.com> Date: Fri, 17 Oct 2025 13:36:17 +0300 Subject: [PATCH 19/36] Update R/bayesfactor-methods.R Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- R/bayesfactor-methods.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/bayesfactor-methods.R b/R/bayesfactor-methods.R index 34e299dac..40f8ca077 100644 --- a/R/bayesfactor-methods.R +++ b/R/bayesfactor-methods.R @@ -111,7 +111,9 @@ print.bayesfactor_matrix <- function(x, log = FALSE, ...) { diag(x) <- "0" } else { - if (orig_log) x <- exp(x) + if (orig_log) { + x <- exp(x) + } x <- insight::format_bf(x, name = NULL, exact = TRUE, ...) diag(x) <- "1" From b22cc5220980a7012e9c8623c82b153c09a95c59 Mon Sep 17 00:00:00 2001 From: "Mattan S. Ben-Shachar" Date: Mon, 10 Nov 2025 10:09:17 +0200 Subject: [PATCH 20/36] save draft and changes --- R/bayesfactor-methods.R | 36 ++- R/bayesfactor_models.R | 3 + R/print.R | 2 +- R/print_html.R | 2 +- R/print_md.R | 2 +- R/weighted_posteriors.R | 28 +- vignettes/bayes_factors[WIP].Rmd | 521 +++++++++++++++++++++++++++++++ vignettes/bibliography.bib | 9 + 8 files changed, 574 insertions(+), 29 deletions(-) create mode 100644 vignettes/bayes_factors[WIP].Rmd diff --git a/R/bayesfactor-methods.R b/R/bayesfactor-methods.R index 40f8ca077..e596847b9 100644 --- a/R/bayesfactor-methods.R +++ b/R/bayesfactor-methods.R @@ -86,13 +86,14 @@ as.matrix.bayestestRBF <- function(x, log = TRUE, ...) { } class(out) <- c("bayesfactor_matrix", class(out)) + attr(out, "model_names") <- rownames(x) attr(out, "log_BF") <- log attr(out, "bf_fun") <- bf_fun out } #' @export -print.bayesfactor_matrix <- function(x, log = FALSE, ...) { +print.bayesfactor_matrix <- function(x, log = FALSE, show_names = FALSE, ...) { orig_x <- x orig_log <- attr(x, "log_BF") @@ -124,14 +125,22 @@ print.bayesfactor_matrix <- function(x, log = FALSE, ...) { # Model names models <- colnames(df) models[models == "1"] <- "(Intercept only)" - models <- paste0("[", seq_along(models), "] ", models) + + if (show_names && !is.null(attr(orig_x, "model_names"))) { + model_names <- attr(orig_x, "model_names") + if (attr(orig_x, "bf_fun") == "bayesfactor_restricted()") { + model_names <- c(1, model_names) + } + } else { + model_names <- seq_along(models) + } + + rowmodels <- paste0("[", model_names, "] ", models) + colmodels <- c("Denominator\\Numerator", paste0(" [", model_names, "] ")) rownames(df) <- colnames(df) <- NULL - df <- cbind(modl = models, df) - colnames(df) <- c( - "Denominator\\Numerator", - paste0(" [", seq_along(models), "] ") - ) + df <- cbind(modl = rowmodels, df) + colnames(df) <- colmodels # caption and footer caption <- switch(attr(orig_x, "bf_fun"), @@ -180,14 +189,15 @@ update.bayesfactor_models <- function(object, subset = NULL, reference = NULL, . if (all(subset < 0)) { subset <- seq_len(nrow(object))[subset] } - object_subset <- object[subset, ] - if (denominator %in% subset) { - attr(object_subset, "denominator") <- which(denominator == subset) - } else { - object_subset <- rbind(object[denominator, ], object_subset) - attr(object_subset, "denominator") <- 1 + subset <- unique(c(denominator, subset)) + object_subset <- datawizard::data_filter(object, subset) + + model_names <- attr(object, "model_names") + if (!is.null(model_names)) { + attr(object_subset, "model_names") <- model_names[subset] } + attr(object_subset, "denominator") <- 1 object <- object_subset } object diff --git a/R/bayesfactor_models.R b/R/bayesfactor_models.R index 0ce6332c4..82a0b1605 100644 --- a/R/bayesfactor_models.R +++ b/R/bayesfactor_models.R @@ -451,6 +451,9 @@ bayesfactor_models.BFBayesFactor <- function(..., verbose = TRUE) { if (!is.null(res$log_BF) && all(is.na(res$log_BF))) { insight::format_error("Could not calculate Bayes Factor for these models. You may report this problem at {https://github.com/easystats/bayestestR/issues/}.") # nolint } + + if (is.null(model_names)) model_names <- rownames(res) + attr(res, "denominator") <- denominator attr(res, "BF_method") <- bf_method attr(res, "unsupported_models") <- unsupported_models diff --git a/R/print.R b/R/print.R index 690a42217..9cf9a5345 100644 --- a/R/print.R +++ b/R/print.R @@ -179,7 +179,7 @@ print.bayestestR_si <- function(x, print.bayesfactor_models <- function(x, digits = 3, log = FALSE, - show_names = TRUE, + show_names = FALSE, caption = "Bayes Factors for Model Comparison", ...) { show_names <- show_names & !attr(x, "unsupported_models") diff --git a/R/print_html.R b/R/print_html.R index 770fc7b06..9d765275b 100644 --- a/R/print_html.R +++ b/R/print_html.R @@ -94,7 +94,7 @@ print_html.bayestestR_si <- function(x, digits = 2, caption = "Support Interval" print_html.bayesfactor_models <- function(x, digits = 3, log = FALSE, - show_names = TRUE, + show_names = FALSE, caption = "Bayes Factors for Model Comparison", ...) { .print_bf_html_default( diff --git a/R/print_md.R b/R/print_md.R index 803f1c499..d58e8b916 100644 --- a/R/print_md.R +++ b/R/print_md.R @@ -94,7 +94,7 @@ print_md.bayestestR_si <- function(x, digits = 2, caption = "Support Interval", print_md.bayesfactor_models <- function(x, digits = 3, log = FALSE, - show_names = TRUE, + show_names = FALSE, caption = "Bayes Factors for Model Comparison", ...) { .print_bf_md_default( diff --git a/R/weighted_posteriors.R b/R/weighted_posteriors.R index a448e1bff..dd274550a 100644 --- a/R/weighted_posteriors.R +++ b/R/weighted_posteriors.R @@ -157,11 +157,7 @@ weighted_posteriors.data.frame <- function(..., prior_odds = NULL, missing = 0, weighted_samps <- round(iterations * Probs) # pass to .weighted_posteriors - res <- .weighted_posteriors(Mods, weighted_samps, missing) - - # make weights table - attr(res, "weights") <- data.frame(Model = mnames, weights = weighted_samps) - return(res) + .weighted_posteriors(Mods, weighted_samps, missing, mnames) } @@ -175,6 +171,7 @@ weighted_posteriors.stanreg <- function(..., component = "conditional", parameters = NULL) { Mods <- list(...) + mnames <- sapply(match.call(expand.dots = FALSE)$`...`, insight::safe_deparse) # Get Bayes factors BFMods <- bayesfactor_models(..., denominator = 1, verbose = verbose) @@ -194,9 +191,7 @@ weighted_posteriors.stanreg <- function(..., parameters = parameters ) - res <- .weighted_posteriors(params, weighted_samps, missing) - attr(res, "weights") <- data.frame(Model = BFMods$Model, weights = weighted_samps) - return(res) + .weighted_posteriors(params, weighted_samps, missing, mnames) } #' @export @@ -253,14 +248,19 @@ weighted_posteriors.BFBayesFactor <- function(..., params <- lapply(params, data.frame) - res <- .weighted_posteriors(params, weighted_samps, missing) - attr(res, "weights") <- data.frame(Model = BFMods$Model, weights = weighted_samps) - return(res) + .weighted_posteriors(params, weighted_samps, missing, BFMods$Model) } -.weighted_posteriors <- function(params, weighted_samps, missing) { +.weighted_posteriors <- function(params, weighted_samps, missing, mnames) { par_names <- unique(unlist(sapply(params, colnames), recursive = TRUE)) + # Table of weights + weights <- data.frame( + Model = mnames, + weights = weighted_samps, + pweights = weighted_samps / sum(weighted_samps) + ) + # remove empty (0 sample) models params <- params[weighted_samps != 0] weighted_samps <- weighted_samps[weighted_samps != 0] @@ -278,7 +278,9 @@ weighted_posteriors.BFBayesFactor <- function(..., } # combine all - do.call("rbind", params) + res <- do.call("rbind", params) + attr(res, "weights") <- weights + return(res) } #' @keywords internal diff --git a/vignettes/bayes_factors[WIP].Rmd b/vignettes/bayes_factors[WIP].Rmd new file mode 100644 index 000000000..b1078b5fe --- /dev/null +++ b/vignettes/bayes_factors[WIP].Rmd @@ -0,0 +1,521 @@ +--- +title: "Bayes Factors" +output: + rmarkdown::html_vignette: + toc: true + toc_depth: 2 + fig_width: 10.08 + fig_height: 6 +tags: [r, bayesian, bayes factors] +vignette: > + \usepackage[utf8]{inputenc} + %\VignetteIndexEntry{Bayes Factors} + %\VignetteEngine{knitr::rmarkdown} +editor_options: + chunk_output_type: console +bibliography: bibliography.bib +csl: apa.csl +--- + +This vignette can be referred to by citing the following: + +- Makowski, D., Ben-Shachar, M. S., \& Lüdecke, D. (2019). *bayestestR: Describing Effects and their Uncertainty, Existence and Significance within the Bayesian Framework*. Journal of Open Source Software, 4(40), 1541. https://doi.org/10.21105/joss.01541 + +- Makowski, D., Ben-Shachar, M. S., Chen, S. H. A., \& Lüdecke, D. (2019). *Indices of Effect Existence and Significance in the Bayesian Framework*. Retrieved from [10.3389/fpsyg.2019.02767](https://doi.org/10.3389/fpsyg.2019.02767) + +--- + +```{r setup, include=FALSE} +library(knitr) + +options(knitr.kable.NA = "", digits = 2) + +knitr::opts_chunk$set( + echo = TRUE, + comment = ">", + out.width = "100%", + message = FALSE, + warning = FALSE, + dpi = 150 +) + +pkgs <- c( + "bayestestR", + "ggplot2", "see", + "rstanarm", "bridgesampling" + # "BayesFactor", "emmeans", "logspline", "lme4", "ggplot2", + # "see", "insight", "knitr", "effectsize", + +) +if (!all(sapply(pkgs, require, quietly = TRUE, character.only = TRUE))) { + knitr::opts_chunk$set(eval = FALSE) +} else { + theme_set(theme_modern()) +} +``` + +The adoption of the Bayesian framework, especially in the +social and psychological sciences, seems to be developing in two distinct directions, +with their separation often marked by their opinion of the **Bayes factor**. +In short, one school of thought (e.g., the *Amsterdam school*, led by [E. J. Wagenmakers](https://www.bayesianspectacles.org/)) advocate its use, and emphasize its qualities as a statistical index, +while another point to its limits and prefer, instead, the precise description of posterior distributions (using [CIs](https://easystats.github.io/bayestestR/reference/hdi.html), [ROPEs](https://easystats.github.io/bayestestR/reference/rope.html), etc.). + +The `bayestestR` package does **not** take a side in this debate, +and offers tools to carry out analysis irrespective of the school you subscribe to. +Instead, it strongly supports the notion of an *informed choice*: + +**discover the methods, learn about them, understand them, try them, and decide for yourself**. + +Having said that, here's an introduction to Bayes factors :) + +# The Bayes Factor + +There are many ways to conceptualize Bayes factors (BFs), but fundamentally: +**BFs are indices of *relative* evidence between two *hypotheses* over another**. + +According to Bayes' theorem, +given a the prior probability of some hypothesis $\mathcal{H}$ ($P(\mathcal{H})$) +and some data $D$, +we can find the posterior probability of the hypothesis ($P(\mathcal{H}|D)$) +by accounting for the probability of observing that datum were the hypothesis true ($P(D|\mathcal{H})$, +also known as the *likelihood*):^[normalized by the marginal probability of observing the data, ($P(D)$, which we will soon see is often not needed.] + +$$ +P(\mathcal{H}|D) = \frac{P(D|\mathcal{H})\times P(\mathcal{H})}{P(D)} +$$ + +Within this context, an hypothesis is formalized through the specification of an a-priori model: +Priors on the parameters ($\Theta$) the define the data generating process. + +If we have two hypothesis, we can find their posterior probability-odds as such: + +$$ +\underbrace{\frac{P(\mathcal{H}_1|D)}{P(\mathcal{H}_2|D)}}_{\text{Posterior Odds}} = +\underbrace{\frac{P(D|\mathcal{H}_1)}{P(D|\mathcal{H}_2)}}_{\text{Likelihood Ratio}} +\times +\underbrace{\frac{P(\mathcal{H}_1)}{P(\mathcal{H}_2)}}_{\text{Prior Odds}} +$$ + +Where the *likelihood ratio* (the middle term) is the *Bayes factor* - +it is the ***factor*** by which some **prior odds** have been updated _after_ observing the data to **posterior odds**. + +Thus, Bayes factors can be calculated in two ways: {#bf-definitions} + +- As a ratio quantifying **the relative probability of the observed data under each of the two hypotheses**: (In some contexts, these probabilities are also called *marginal likelihoods*.) + +$$ +BF_{12}=\frac{P(D|\mathcal{H}_1)}{P(D|\mathcal{H}_2)} +$$ + +- As **the degree of shift in prior beliefs** about the relative credibility of +two hypotheses (since they can be computed by dividing posterior odds by prior +odds). + +$$ +BF_{12}=\frac{\text{Posterior Odds}_{12}}{\text{Prior Odds}_{12}} +$$ + + + + + +Here we provide functions for computing Bayes factors in two different contexts: + +- **Comparing statistical models that differ on their priors, representing two competing hypotheses** +- **Testing single estimates (parameters, coefficients, transformed parameters) within a given model** + +# 1. Comparing Models using Bayes Factors {#bayesfactor_models} + +Let's take a look at the _kid IQ_ dataset from the `{rstanarm}` package. + +```{r} +data("kidiq", package = "rstanarm") + +kidiq <- kidiq[c("kid_score", "mom_hs")] +kidiq$mom_hs <- factor(kidiq$mom_hs, levels = 0:1, labels = c("no", "yes")) + +head(kidiq) +``` + +We'll be typing to answer a simple question: +what is the mean difference in IQ scores between children whose mothers completed high-school and those whose mothers did not complete high school (as indicated by the `mom_hs` variable). + +There are many hypothesis we might have about this difference. Let's start by examining: + +- $\mathcal{H}_0$: There's no difference in IQ between the two groups. +- $\mathcal{H}_1$: The difference is probably around 20 point in favor of kids whose mothers completed high school. +- $\mathcal{H}_2$: A more conservative hypothesis that the difference, if it exists, is probably no more than about 5 point in either direction. + +Let's plot these: + +```{r, echo=FALSE} +p_prior0 <- ggplot() + + geom_vline(xintercept = 0, linetype = "dashed") + + geom_segment(aes(x = 0, xend = 0, y = 0, yend = 1), + linewidth = 1, color = "royalblue") + + geom_point(aes(x = 0, y = 1), + size = 3, color = "royalblue") + + theme(axis.text.y = element_blank()) + + expand_limits(y = 1.5, x = c(-50, 50)) + + labs( + x = "Difference", + y = NULL, + title = expression(H[0]~":"~theta==0) + ) + +p_prior1 <- ggplot() + + stat_function( + geom = "area", + fun = dnorm, + args = list(mean = 20, sd = 10), + xlim = c(-50, 50), + fill = "royalblue", + ) + + geom_vline(xintercept = 0, linetype = "dashed") + + theme(axis.text.y = element_blank()) + + expand_limits(y = 0.05) + + labs( + x = "Difference", + y = NULL, + title = expression(H[1]~":"~theta%~%norma(20, 10^2)) + ) + +p_prior2 <- ggplot() + + stat_function( + geom = "area", + fun = dnorm, + args = list(mean = 0, sd = 5), + xlim = c(-50, 50), + fill = "royalblue", + ) + + geom_vline(xintercept = 0, linetype = "dashed") + + theme(axis.text.y = element_blank()) + + expand_limits(y = 0.05) + + labs( + x = "Difference", + y = NULL, + title = expression(H[2]~":"~theta%~%norma(0, 5^2)) + ) + +plots(p_prior0, p_prior1, p_prior2, n_columns = 1) + +``` + +We can build models with these different priors with `{brms}` or `{rstanarm}`:^[We will be using `{rstanarm}` throughout this vignette, but `bayestestR` also supports `{brms}`, `{blavaan}`, `{rstan}`, `{cmdstanr}`, `{BayesFactor}` and more.] + +In any case, note the we will always require _many_ posterior samples for the stability of our BF estimation (typically 10 times more than what we would need for posterior estimation alone; @gronau2020bridgesampling). + +```{r} +mod_H0 <- stan_glm( + kid_score ~ 1, + family = gaussian(), + data = kidiq, + + diagnostic_file = file.path(tempdir(), "df0.csv"), # required for BF computation + + chains = 10, iter = 5000, warmup = 1000, + refresh = 0 +) + +mod_H1 <- stan_glm( + kid_score ~ mom_hs, + family = gaussian(), + data = kidiq, + + prior = normal(location = 20, scale = 10), + diagnostic_file = file.path(tempdir(), "df1.csv"), + + chains = 10, iter = 5000, warmup = 1000, + refresh = 0 +) + +mod_H2 <- stan_glm( + kid_score ~ mom_hs, + family = gaussian(), + data = kidiq, + + prior = normal(location = 0, scale = 5), + diagnostic_file = file.path(tempdir(), "df2.csv"), + + chains = 10, iter = 5000, warmup = 1000, + refresh = 0 +) +``` + +We can now ask: which a-priori model (each representing a different hypothesis) is more likely to have produced the observed data? + +This is usually done by comparing the marginal likelihoods of two models. In +such a case, the Bayes factor is a measure of the **relative** evidence for one +hypothesis over the other. + +```{r} +bfs <- bayesfactor_models(mod_H1, mod_H2, denominator = mod_H0, verbose = FALSE) + +print(bfs, show_names = TRUE) +``` + + +We can see that the both models that allow for a difference between the groups +are much more supported by the data - +with $BF>`r insight::format_value(exp(bfs$log_BF[2]))`$ - +compared to the null (intercept only). + +Due to the transitive property of Bayes factors, +we can easily change the reference model to the model representing $\mathcal{H}_2$: + +```{r update_models1} +bfs2 <- update(bfs, reference = 2, subset = 1) + +print(bfs2, show_names = TRUE) +``` + +As we can see, the data supports the a-priori model that suggests a positive difference almost 4 times over the model that suggests a small difference. + +We can also get a matrix of Bayes factors of all the pairwise model comparisons: + +```{r} +print(as.matrix(bfs), show_names = TRUE) +``` + +Overall, we can see that both models that allow for some non-0 difference are much more supported by the data compared to the 0-difference model. Let's take a look at the data: + +```{r, echo=FALSE} +ggplot(kidiq, aes(mom_hs, kid_score, fill = mom_hs, color = mom_hs)) + + geom_violindot() + + geom_boxplot(fill = NA, position = position_nudge(-0.2), width = 0.1) + + labs(x = "Mom completed high-school?", y = "Kids' IQ") + + guides(fill = "none", color = "none") +``` + +And indeed both models 1 and 2's posteriors reflect this difference: + +```{r, echo=FALSE} +plots( + plot(hdi(mod_H1)) + + labs(y = NULL, title = "Model 1") + + expand_limits(x = c(-10, 30)) + + scale_y_discrete(expand = expansion(0.1, 0)) + + guides(fill = "none"), + plot(hdi(mod_H2)) + + expand_limits(x = c(-10, 30)) + + scale_y_discrete(expand = expansion(0.1, 0)) + + labs(y = NULL, title = "Model 2"), + + n_columns = 1, + guides = "collect" +) +``` + +Note that these posterior distributions are _very_ similar, +but BFs do not compare posterior models - only _a-priori_ models! + +For this reason, computing BFs only makes sense if we are able to formulate our hypotheses +into distinct priors. + +## The BIC approximation + +It is also possible to compute approximate Bayes factors for the comparison of frequentist models. +This is done by comparing BIC indices, allowing a Bayesian comparison +of nested as well as non-nested frequentist models [@wagenmakers2007practical]. + +Since frequentist modeling does not allow for specification of priors, we are limited to either restricting parameters to 0 or not. + +```{r} +mod_H0f <- lm(kid_score ~ 1, data = kidiq) + +mod_H1f <- lm(kid_score ~ mom_hs, data = kidiq) + +bayesfactor_models(mod_H1f, denominator = mod_H0f) +``` + +(Note how similar this approximate BF is to the proper BFs estimated above.) + + +## Model averaging + +In the previous section, we discussed the direct comparison of two models to +determine if an hypothesis is supported by the data. +However, in many cases there are too many models to consider, +or perhaps it is not straightforward which models we should be comparing to determine if an effect is supported by the data. +For such cases, we can use Bayesian model averaging (BMA) to determine the support +provided by the data for a parameter or model-term across many models. + +### Inclusion Bayes factors {#bayesfactor_inclusion} + +Inclusion Bayes factors answer the question: + +> **Are the observed data more probable under models with a particular predictor, than they are under models without that particular predictor?** + +In other words, on average, are models with predictor $X$ more likely to have +produced the observed data than models without predictor $X$?^[A model without +predictor $X$ can be thought of as a model in which the parameter(s) of the +predictor have been restricted to a null-point of 0.] + +These Bayes factors are computed not as the ratios of marginal likelihoods, +but as **the degree of shift in prior beliefs**: +Since each model has a prior probability, it is possible to sum the prior +probability of all models that include a predictor of interest (the *prior inclusion probability*), and of all models that do not include that predictor +(the *prior exclusion probability*). After the data are observed, and each model +is assigned a posterior probability, we can similarly consider the sums of the +posterior models' probabilities to obtain the *posterior inclusion probability* +and the *posterior exclusion probability*. The change from prior +inclusion odds to the posterior inclusion odds is the **Inclusion Bayes factor** +["$BF_{Inclusion}$"; @clyde2011bayesian]. + +```{r} +(bfinc <- bayesfactor_inclusion(bfs)) +``` + +We can see that across the 3 models under consideration, models _with_ the `mom_hs` term fit the data `r insight::format_value(exp(bfinc$log_BF))` times more than the model _without_ that term. + +### Averaging posteriors {#weighted_posteriors} + +Similar to how we can average evidence for a predictor across models, we can +also average the **posterior estimate** across models. + +```{r} +ppp <- weighted_posteriors(mod_H0, mod_H1, mod_H2) + +plot(hdi(ppp$mom_hsyes)) +``` + +This looks a lot like the posterior obtained from the second model, which shouldn't be surprising since about 80% of the averaged posterior comes from the second model. + +```{r} +attr(ppp, "weights") +``` + + +## Order restricted models {#bayesfactor_restricted} + +We've already seen we can formalize hypothesis into distributional priors +(e.g., _the difference is probably no more than about 5 point in either direction._ became $theta \sim Normal(0, 5^2)$). +These priors are **unrestricted** - that is, **all values** between $-\infty$ +and $\infty$ of all parameters in the model have some non-zero credibility (no +matter how small; this is true for both the prior and posterior distribution). + +But we can also formalize hypotheses as **order restrictions** [@morey_2015_blog; +@morey2011bayesinterval]. + +For example, we can impose an _additional_ order restriction +that the difference _must be positive_, which we can write like this (if we had to): + +$$ +\mathcal{H}_{2r}: theta \sim Normal(0, 5^2)\begin{bmatrix} \infty \\ 0 \end{bmatrix} +$$ + +By testing the probabilities of these restrictions on prior and posterior samples, +we can see how the probabilities of the restricted distributions change after observing the data. +This can be achieved with `bayesfactor_restricted()`, that compute a Bayes +factor for these restricted model vs the unrestricted model. + +```{r} +bayesfactor_restricted(mod_H2, hypothesis = "mom_hsyes > 0") +``` + + +In other words, the data fits the restricted model (where the difference must be small _and positive_) twice as much as it fits the un-restircted model (where the difference must be small). + +We can compare multiple restricted hypotheses. For example: that the difference isn't just positive, it's larger than 5. + +```{r} +bf_rstr2 <- bayesfactor_restricted(mod_H2, hypothesis = c( + positive = "mom_hsyes > 0", + strong = "mom_hsyes > 4" +)) + +``` + +Here too we can obtain a matrix of BFs between all models: + +```{r} +print(as.matrix(bf_rstr2), show_names = TRUE) +``` + +We can see the "strong" model is preferred over both the un-restricted model and the "positive" model. + + +Again, we can use the transitive properties of Bayes factors to find the BF comparing $\mathcal{H}_{2r}$ and $\mathcal{H}_1$: + +$$ +BF_{2r,1} = BF_{2,0} \times BF_{2r,2} = \frac{P(D|\mathcal{H}_{2})}{P(D|\mathcal{H}_0)} \times \frac{P(D|\mathcal{H}_{2r})}{P(D|\mathcal{H}_2)} = \frac{P(D|\mathcal{H}_{2r})}{P(D|\mathcal{H}_0)} +$$ + +```{r} +BF_2.0 <- as.numeric(bfs)[2] +BF_2r.2 <- as.numeric(bf_rstr) + +(BF_2r.0 <- BF_2.0 * BF_2r.2) +``` + +So the data support the hypothesis that the difference is small but strictly positive +`r insight::format_value(BF_2r.0)` times more than the hypothesis that the difference is exactly 0. + +**Because these restrictions are on the prior distribution, they are only appropriate for testing pre-planned (*a priori*) hypotheses, and should not be used for any post hoc comparisons [@morey_2015_blog].** + +--- + +We are not limited to a single order restrictions - we can compound them to create complex restrictions. + +Let's look at the [`disgust` dataset](http://bayesfactor.blogspot.com/2015/01/multiple-comparisons-with-bayesfactor-2.html), were 150 individuals rated "moral harshness" of undocumented migrants in one of three conditions: no odor, clean odor (lemon), or disgusting (sulfur) odor during questionnaire. + +```{r} +data("disgust", package = "bayestestR") + +str(disgust) +``` + + +Let's build our simple one-way-ANOVA-like model: + +```{r} +mod_odor <- stan_glm( + score ~ condition, + family = gaussian(), + data = disgust, + + prior = normal(location = 0, scale = 4), + diagnostic_file = file.path(tempdir(), "df2.csv"), + + chains = 10, iter = 5000, warmup = 1000, + refresh = 0 +) +``` + +```{r} +hyps <- c( + "control < lemon" = "conditionlemon < 0", + "control < lemon" = "conditionlemon < 0", +) + +bayesfactor_restricted(mod_odor, hypothesis = hyps) +``` + + +**NOTE**: See the *Specifying Correct Priors for Factors with More Than 2 Levels* appendix below. + + +# 2. Testing Models' Parameters with Bayes Factors {#bayesfactor_parameters} + +### Testing against a null-*region* + +### Directional hypotheses + +### Support intervals and curves {#si} + +A continuous extension of the density ratio... + +# Appendices + + +```{r} + +``` + +## Specifying correct priors for factors {#contr_bayes} + +## Contrasts (and marginal means) + + +# References + diff --git a/vignettes/bibliography.bib b/vignettes/bibliography.bib index a75a92bb1..24b1352a4 100644 --- a/vignettes/bibliography.bib +++ b/vignettes/bibliography.bib @@ -469,4 +469,13 @@ @article{van2019cautionary author={van den Bergh, Don and Haaf, Julia M and Ly, Alexander and Rouder, Jeffrey N and Wagenmakers, Eric-Jan}, year={2019}, publisher={PsyArXiv} +} + +@article{gronau2020bridgesampling, + title={bridgesampling: An R package for estimating normalizing constants}, + author={Gronau, Quentin F and Singmann, Henrik and Wagenmakers, Eric-Jan}, + journal={Journal of Statistical Software}, + volume={92}, + pages={1--29}, + year={2020} } \ No newline at end of file From 66b8a7cca3b8d325a97195d3e43b50df86cc9419 Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 4 May 2026 08:00:29 +0200 Subject: [PATCH 21/36] Update R/bayesfactor_models.R Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- R/bayesfactor_models.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/R/bayesfactor_models.R b/R/bayesfactor_models.R index 82a0b1605..f28e80848 100644 --- a/R/bayesfactor_models.R +++ b/R/bayesfactor_models.R @@ -279,7 +279,11 @@ bayesfactor_models.default <- function(..., denominator = 1, verbose = TRUE) { } else { res <- .bayesfactor_models_stan_REG(mods, denominator, verbose) bf_method <- "marginal likelihoods (bridgesampling)" - unsupported_models <- if (inherits(mods[[1]], c("stanfit", "CmdStanFit"))) TRUE else FALSE + unsupported_models <- if (inherits(mods[[1]], c("stanfit", "CmdStanFit"))) { + TRUE + } else { + FALSE + } } .bf_models_output(res, From 927bec639332be17be9a1be09742a2205346c2a4 Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 4 May 2026 08:00:58 +0200 Subject: [PATCH 22/36] Update R/bayesfactor_models.R Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- R/bayesfactor_models.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/bayesfactor_models.R b/R/bayesfactor_models.R index f28e80848..eb6214eb2 100644 --- a/R/bayesfactor_models.R +++ b/R/bayesfactor_models.R @@ -286,7 +286,8 @@ bayesfactor_models.default <- function(..., denominator = 1, verbose = TRUE) { } } - .bf_models_output(res, + .bf_models_output( + res, denominator = denominator, bf_method = bf_method, unsupported_models = unsupported_models From bc3ce1d2b9124160665ec4f5aa7850446940c6d1 Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 4 May 2026 08:19:23 +0200 Subject: [PATCH 23/36] Update R/bayesfactor_models.R Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- R/bayesfactor_models.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/bayesfactor_models.R b/R/bayesfactor_models.R index eb6214eb2..35f8d5e70 100644 --- a/R/bayesfactor_models.R +++ b/R/bayesfactor_models.R @@ -312,7 +312,9 @@ bayesfactor_models.default <- function(..., denominator = 1, verbose = TRUE) { insight::format_error("Models were not computed from the same data.") } } else if (verbose) { - insight::format_alert("Unable to validate that all models were fit with the same data.") + insight::format_alert( + "Unable to validate that all models were fit with the same data." + ) } mML <- lapply(mods, .get_marglik, verbose = verbose) From 337f21b1329714132c0f55492b1c585a9511a74e Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 4 May 2026 08:27:45 +0200 Subject: [PATCH 24/36] air styler --- DESCRIPTION | 2 +- NAMESPACE | 1 - R/bayesfactor-methods.R | 9 +- R/bayesfactor_models.R | 87 +++++++---- R/bayesfactor_parameters.R | 186 +++++++++++++---------- R/bayesfactor_restricted.R | 76 +++++---- R/diagnostic_posterior.R | 8 +- R/print.R | 123 ++++++++------- R/print_html.R | 101 ++++++++---- R/print_md.R | 93 ++++++++---- R/si.R | 124 +++++++++++---- R/spi.R | 109 ++++++++----- R/utils_bayesfactor.R | 73 ++++----- R/utils_check_collinearity.R | 25 ++- R/weighted_posteriors.R | 52 ++++--- man/bayesfactor_parameters.Rd | 28 +--- man/contr.equalprior.Rd | 2 +- tests/testthat/test-bayesfactor_models.R | 37 +++-- 18 files changed, 712 insertions(+), 424 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1680e57b0..9e765d5ce 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: bayestestR Title: Understand and Describe Bayesian Models and Posterior Distributions -Version: 0.17.0.3 +Version: 0.17.0.4 Authors@R: c(person(given = "Dominique", family = "Makowski", diff --git a/NAMESPACE b/NAMESPACE index fb2aabfde..9209b8452 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -588,7 +588,6 @@ S3method(sexit_thresholds,mlm) S3method(sexit_thresholds,stanreg) S3method(sexit_thresholds,wbm) S3method(sexit_thresholds,zeroinfl) -S3method(si,CmdStanFit) S3method(si,blavaan) S3method(si,brmsfit) S3method(si,comparisons) diff --git a/R/bayesfactor-methods.R b/R/bayesfactor-methods.R index e596847b9..fd15175d1 100644 --- a/R/bayesfactor-methods.R +++ b/R/bayesfactor-methods.R @@ -143,7 +143,8 @@ print.bayesfactor_matrix <- function(x, log = FALSE, show_names = FALSE, ...) { colnames(df) <- colmodels # caption and footer - caption <- switch(attr(orig_x, "bf_fun"), + caption <- switch( + attr(orig_x, "bf_fun"), "bayesfactor_restricted()" = "# Bayes Factors for Restricted Models", "# Bayes Factors for Model Comparison" ) @@ -210,8 +211,10 @@ update.bayesfactor_models <- function(object, subset = NULL, reference = NULL, . #' @export as.numeric.bayestestRBF <- function(x, log = FALSE, ...) { out <- x[["log_BF"]] - if (!log) out <- exp(out) - return(out) + if (!log) { + out <- exp(out) + } + out } #' @export diff --git a/R/bayesfactor_models.R b/R/bayesfactor_models.R index 35f8d5e70..b35425546 100644 --- a/R/bayesfactor_models.R +++ b/R/bayesfactor_models.R @@ -179,12 +179,15 @@ bayesfactor_models.default <- function(..., denominator = 1, verbose = TRUE) { estimator <- mods[["estimator"]] check_response <- mods[["check_response"]] - if (is.null(estimator)) estimator <- "ML" - if (is.null(check_response)) check_response <- FALSE + if (is.null(estimator)) { + estimator <- "ML" + } + if (is.null(check_response)) { + check_response <- FALSE + } mods[["check_response"]] <- mods[["estimator"]] <- NULL cl$...$estimator <- cl$...$check_response <- NULL - names(mods) <- sapply(cl[["..."]], insight::safe_deparse) names(denominator) <- insight::safe_deparse(cl$denominator) @@ -216,10 +219,13 @@ bayesfactor_models.default <- function(..., denominator = 1, verbose = TRUE) { } # Get BIC - if (were_checked && estimator == "REML" && - any(vapply(mods, insight::is_mixed_model, TRUE)) && - !isTRUE(attr(model_objects, "same_fixef")) && - verbose) { + if ( + were_checked && + estimator == "REML" && + any(vapply(mods, insight::is_mixed_model, TRUE)) && + !isTRUE(attr(model_objects, "same_fixef")) && + verbose + ) { insight::format_warning(paste( "Information criteria (like BIC) based on REML fits (i.e. `estimator=\"REML\"`)", "are not recommended for comparison between models with different fixed effects.", @@ -227,18 +233,26 @@ bayesfactor_models.default <- function(..., denominator = 1, verbose = TRUE) { )) } } else if (verbose) { - insight::format_alert("Unable to validate that all models were fit with the same data.") + insight::format_alert( + "Unable to validate that all models were fit with the same data." + ) } - mBIC <- tryCatch(sapply(mods, function(m) { - LL <- insight::get_loglikelihood( - m, - estimator = estimator, check_response = check_response - ) - stats::BIC(LL) - }), error = function(...) NULL) + mBIC <- tryCatch( + sapply(mods, function(m) { + LL <- insight::get_loglikelihood( + m, + estimator = estimator, + check_response = check_response + ) + stats::BIC(LL) + }), + error = function(...) NULL + ) - if (is.null(mBIC)) mBIC <- sapply(mods, stats::BIC) + if (is.null(mBIC)) { + mBIC <- sapply(mods, stats::BIC) + } # Get BF mBFs <- bic_to_bf(mBIC, denominator = mBIC[denominator], log = TRUE) @@ -249,7 +263,8 @@ bayesfactor_models.default <- function(..., denominator = 1, verbose = TRUE) { stringsAsFactors = FALSE ) - .bf_models_output(res, + .bf_models_output( + res, denominator = denominator, bf_method = "BIC approximation", unsupported_models = !all(supported_models), @@ -262,7 +277,9 @@ bayesfactor_models.default <- function(..., denominator = 1, verbose = TRUE) { # Warn n_samps <- sapply(mods, function(x) { alg <- insight::find_algorithm(x) - if (is.null(alg$iterations)) alg$iterations <- alg$sample + if (is.null(alg$iterations)) { + alg$iterations <- alg$sample + } (alg$iterations - alg$warmup) * alg$chains }) if (any(n_samps < 4e4) && verbose) { @@ -410,7 +427,8 @@ bayesfactor_models.BFBayesFactor <- function(..., verbose = TRUE) { stringsAsFactors = FALSE ) - .bf_models_output(res, + .bf_models_output( + res, denominator = 1, bf_method = "JZS (BayesFactor)", unsupported_models = !inherits(models@denominator, "BFlinearModel") @@ -449,23 +467,34 @@ bayesfactor_models.BFBayesFactor <- function(..., verbose = TRUE) { #' @keywords internal -.bf_models_output <- function(res, - denominator = 1, - bf_method = "method", - unsupported_models = FALSE, - model_names = NULL) { +.bf_models_output <- function( + res, + denominator = 1, + bf_method = "method", + unsupported_models = FALSE, + model_names = NULL +) { # sanity check - are all BF NA? if (!is.null(res$log_BF) && all(is.na(res$log_BF))) { - insight::format_error("Could not calculate Bayes Factor for these models. You may report this problem at {https://github.com/easystats/bayestestR/issues/}.") # nolint + insight::format_error( + "Could not calculate Bayes Factor for these models. You may report this problem at {https://github.com/easystats/bayestestR/issues/}." + ) } - if (is.null(model_names)) model_names <- rownames(res) + if (is.null(model_names)) { + model_names <- rownames(res) + } attr(res, "denominator") <- denominator attr(res, "BF_method") <- bf_method attr(res, "unsupported_models") <- unsupported_models attr(res, "model_names") <- model_names - class(res) <- c("bayestestRBF", "bayesfactor_models", "see_bayesfactor_models", class(res)) + class(res) <- c( + "bayestestRBF", + "bayesfactor_models", + "see_bayesfactor_models", + class(res) + ) res } @@ -559,7 +588,9 @@ bayesfactor_models.BFBayesFactor <- function(..., verbose = TRUE) { # Else... Get marginal likelihood if (verbose) { - insight::format_alert("Computation of Marginal Likelihood: estimating marginal likelihood, please wait...") + insight::format_alert( + "Computation of Marginal Likelihood: estimating marginal likelihood, please wait..." + ) } # Should probably allow additional arguments such as reps or cores to for bridge_sampler bridgesampling::bridge_sampler(mod, silent = TRUE) diff --git a/R/bayesfactor_parameters.R b/R/bayesfactor_parameters.R index ae9a1d5a5..3520a3f6c 100644 --- a/R/bayesfactor_parameters.R +++ b/R/bayesfactor_parameters.R @@ -179,23 +179,27 @@ #' @family Bayes factors #' #' @export -bayesfactor_parameters <- function(posterior, - prior = NULL, - direction = "two-sided", - null = 0, - ..., - verbose = TRUE) { +bayesfactor_parameters <- function( + posterior, + prior = NULL, + direction = "two-sided", + null = 0, + ..., + verbose = TRUE +) { UseMethod("bayesfactor_parameters") } #' @rdname bayesfactor_parameters #' @export -bayesfactor_pointnull <- function(posterior, - prior = NULL, - direction = "two-sided", - null = 0, - ..., - verbose = TRUE) { +bayesfactor_pointnull <- function( + posterior, + prior = NULL, + direction = "two-sided", + null = 0, + ..., + verbose = TRUE +) { if (length(null) > 1L && verbose) { insight::format_alert("`null` is a range - computing a ROPE based Bayes factor.") } @@ -212,14 +216,18 @@ bayesfactor_pointnull <- function(posterior, #' @rdname bayesfactor_parameters #' @export -bayesfactor_rope <- function(posterior, - prior = NULL, - direction = "two-sided", - null = rope_range(posterior, verbose = FALSE), - ..., - verbose = TRUE) { +bayesfactor_rope <- function( + posterior, + prior = NULL, + direction = "two-sided", + null = rope_range(posterior, verbose = FALSE), + ..., + verbose = TRUE +) { if (length(null) < 2 && verbose) { - insight::format_alert("'null' is a point - computing a Savage-Dickey (point null) Bayes factor.") + insight::format_alert( + "'null' is a point - computing a Savage-Dickey (point null) Bayes factor." + ) } bayesfactor_parameters( @@ -246,12 +254,14 @@ bf_rope <- bayesfactor_rope #' @rdname bayesfactor_parameters #' @export -bayesfactor_parameters.numeric <- function(posterior, - prior = NULL, - direction = "two-sided", - null = 0, - ..., - verbose = TRUE) { +bayesfactor_parameters.numeric <- function( + posterior, + prior = NULL, + direction = "two-sided", + null = 0, + ..., + verbose = TRUE +) { # nm <- insight::safe_deparse(substitute(posterior) if (is.null(prior)) { @@ -268,9 +278,12 @@ bayesfactor_parameters.numeric <- function(posterior, # Get BFs sdbf <- bayesfactor_parameters.data.frame( - posterior = posterior, prior = prior, - direction = direction, null = null, - verbose = verbose, ... + posterior = posterior, + prior = prior, + direction = direction, + null = null, + verbose = verbose, + ... ) sdbf$Parameter <- NULL sdbf @@ -279,27 +292,36 @@ bayesfactor_parameters.numeric <- function(posterior, #' @rdname bayesfactor_parameters #' @export -bayesfactor_parameters.stanreg <- function(posterior, - prior = NULL, - direction = "two-sided", - null = 0, - effects = "fixed", - component = "conditional", - parameters = NULL, - ..., - verbose = TRUE) { +bayesfactor_parameters.stanreg <- function( + posterior, + prior = NULL, + direction = "two-sided", + null = 0, + effects = "fixed", + component = "conditional", + parameters = NULL, + ..., + verbose = TRUE +) { cleaned_parameters <- .get_cleaned_parameters(posterior, ...) - samps <- .clean_priors_and_posteriors(posterior, prior, - effects = effects, component = component, - parameters = parameters, verbose = verbose + samps <- .clean_priors_and_posteriors( + posterior, + prior, + effects = effects, + component = component, + parameters = parameters, + verbose = verbose ) # Get BFs temp <- bayesfactor_parameters.data.frame( - posterior = samps$posterior, prior = samps$prior, - direction = direction, null = null, - verbose = verbose, ... + posterior = samps$posterior, + prior = samps$prior, + direction = direction, + null = null, + verbose = verbose, + ... ) bf_val <- .prepare_output(temp, cleaned_parameters, inherits(posterior, "stanmvreg")) @@ -326,23 +348,26 @@ bayesfactor_parameters.stanfit <- bayesfactor_parameters.stanreg #' @export -bayesfactor_parameters.blavaan <- function(posterior, - prior = NULL, - direction = "two-sided", - null = 0, - ..., - verbose = TRUE) { +bayesfactor_parameters.blavaan <- function( + posterior, + prior = NULL, + direction = "two-sided", + null = 0, + ..., + verbose = TRUE +) { cleaned_parameters <- insight::clean_parameters(posterior) - samps <- .clean_priors_and_posteriors(posterior, prior, - verbose = verbose - ) + samps <- .clean_priors_and_posteriors(posterior, prior, verbose = verbose) # Get BFs temp <- bayesfactor_parameters.data.frame( - posterior = samps$posterior, prior = samps$prior, - direction = direction, null = null, - verbose = verbose, ... + posterior = samps$posterior, + prior = samps$prior, + direction = direction, + null = null, + verbose = verbose, + ... ) bf_val <- .prepare_output(temp, cleaned_parameters) @@ -359,12 +384,14 @@ bayesfactor_parameters.blavaan <- function(posterior, #' @export -bayesfactor_parameters.emmGrid <- function(posterior, - prior = NULL, - direction = "two-sided", - null = 0, - ..., - verbose = TRUE) { +bayesfactor_parameters.emmGrid <- function( + posterior, + prior = NULL, + direction = "two-sided", + null = 0, + ..., + verbose = TRUE +) { samps <- .clean_priors_and_posteriors( posterior, prior, @@ -399,13 +426,15 @@ bayesfactor_parameters.comparisons <- bayesfactor_parameters.emmGrid #' @rdname bayesfactor_parameters #' @inheritParams p_direction #' @export -bayesfactor_parameters.data.frame <- function(posterior, - prior = NULL, - direction = "two-sided", - null = 0, - rvar_col = NULL, - ..., - verbose = TRUE) { +bayesfactor_parameters.data.frame <- function( + posterior, + prior = NULL, + direction = "two-sided", + null = 0, + rvar_col = NULL, + ..., + verbose = TRUE +) { x_rvar <- .possibly_extract_rvar_col(posterior, rvar_col) if (length(x_rvar) > 0L) { cl <- match.call() @@ -443,7 +472,6 @@ bayesfactor_parameters.data.frame <- function(posterior, ) } - sdlogbf <- numeric(ncol(posterior)) for (par in seq_along(posterior)) { sdlogbf[par] <- .logbayesfactor_parameters( @@ -477,12 +505,14 @@ bayesfactor_parameters.data.frame <- function(posterior, #' @export -bayesfactor_parameters.draws <- function(posterior, - prior = NULL, - direction = "two-sided", - null = 0, - ..., - verbose = TRUE) { +bayesfactor_parameters.draws <- function( + posterior, + prior = NULL, + direction = "two-sided", + null = 0, + ..., + verbose = TRUE +) { bayesfactor_parameters( .posterior_draws_to_df(posterior), prior = if (!is.null(prior)) .posterior_draws_to_df(prior), @@ -498,11 +528,7 @@ bayesfactor_parameters.rvar <- bayesfactor_parameters.draws #' @keywords internal -.logbayesfactor_parameters <- function(posterior, - prior, - direction = 0, - null = 0, - ...) { +.logbayesfactor_parameters <- function(posterior, prior, direction = 0, null = 0, ...) { stopifnot(length(null) %in% c(1, 2)) if (isTRUE(all.equal(posterior, prior))) { diff --git a/R/bayesfactor_restricted.R b/R/bayesfactor_restricted.R index f0e7671ae..32fbb31c9 100644 --- a/R/bayesfactor_restricted.R +++ b/R/bayesfactor_restricted.R @@ -144,19 +144,27 @@ bf_restricted <- bayesfactor_restricted #' @rdname bayesfactor_restricted #' @export -bayesfactor_restricted.stanreg <- function(posterior, hypothesis, prior = NULL, - verbose = TRUE, - effects = "fixed", - component = "conditional", - ...) { - samps <- .clean_priors_and_posteriors(posterior, prior, - effects = effects, component = component, +bayesfactor_restricted.stanreg <- function( + posterior, + hypothesis, + prior = NULL, + verbose = TRUE, + effects = "fixed", + component = "conditional", + ... +) { + samps <- .clean_priors_and_posteriors( + posterior, + prior, + effects = effects, + component = component, verbose = verbose ) # Get savage-dickey BFs bayesfactor_restricted.data.frame( - posterior = samps$posterior, prior = samps$prior, + posterior = samps$posterior, + prior = samps$prior, hypothesis = hypothesis ) } @@ -173,15 +181,19 @@ bayesfactor_restricted.stanfit <- bayesfactor_restricted.stanreg #' @rdname bayesfactor_restricted #' @export -bayesfactor_restricted.blavaan <- function(posterior, hypothesis, prior = NULL, - verbose = TRUE, ...) { - samps <- .clean_priors_and_posteriors(posterior, prior, - verbose = verbose - ) +bayesfactor_restricted.blavaan <- function( + posterior, + hypothesis, + prior = NULL, + verbose = TRUE, + ... +) { + samps <- .clean_priors_and_posteriors(posterior, prior, verbose = verbose) # Get savage-dickey BFs bayesfactor_restricted.data.frame( - posterior = samps$posterior, prior = samps$prior, + posterior = samps$posterior, + prior = samps$prior, hypothesis = hypothesis ) } @@ -189,15 +201,18 @@ bayesfactor_restricted.blavaan <- function(posterior, hypothesis, prior = NULL, #' @rdname bayesfactor_restricted #' @export -bayesfactor_restricted.emmGrid <- function(posterior, hypothesis, prior = NULL, - verbose = TRUE, - ...) { - samps <- .clean_priors_and_posteriors(posterior, prior, - verbose = verbose - ) +bayesfactor_restricted.emmGrid <- function( + posterior, + hypothesis, + prior = NULL, + verbose = TRUE, + ... +) { + samps <- .clean_priors_and_posteriors(posterior, prior, verbose = verbose) bayesfactor_restricted.data.frame( - posterior = samps$posterior, prior = samps$prior, + posterior = samps$posterior, + prior = samps$prior, hypothesis = hypothesis ) } @@ -217,7 +232,13 @@ bayesfactor_restricted.comparisons <- bayesfactor_restricted.emmGrid #' @export #' @rdname bayesfactor_restricted #' @inheritParams p_direction -bayesfactor_restricted.data.frame <- function(posterior, hypothesis, prior = NULL, rvar_col = NULL, ...) { +bayesfactor_restricted.data.frame <- function( + posterior, + hypothesis, + prior = NULL, + rvar_col = NULL, + ... +) { x_rvar <- .possibly_extract_rvar_col(posterior, rvar_col) if (length(x_rvar) > 0L) { cl <- match.call() @@ -231,7 +252,6 @@ bayesfactor_restricted.data.frame <- function(posterior, hypothesis, prior = NUL return(eval.parent(cl)) } - p_hypothesis <- parse(text = hypothesis) if (is.null(prior)) { @@ -259,10 +279,13 @@ bayesfactor_restricted.data.frame <- function(posterior, hypothesis, prior = NUL x_logical } - posterior_l <- as.data.frame(lapply(p_hypothesis, .test_hypothesis, data = posterior)) prior_l <- as.data.frame(lapply(p_hypothesis, .test_hypothesis, data = prior)) - colnames(posterior_l) <- colnames(prior_l) <- if (is.null(names(hypothesis))) hypothesis else names(hypothesis) + colnames(posterior_l) <- colnames(prior_l) <- if (is.null(names(hypothesis))) { + hypothesis + } else { + names(hypothesis) + } posterior_p <- sapply(posterior_l, mean) prior_p <- sapply(prior_l, mean) @@ -288,7 +311,8 @@ bayesfactor_restricted.data.frame <- function(posterior, hypothesis, prior = NUL #' @export bayesfactor_restricted.draws <- function(posterior, hypothesis, prior = NULL, ...) { - bayesfactor_restricted(.posterior_draws_to_df(posterior), + bayesfactor_restricted( + .posterior_draws_to_df(posterior), hypothesis = hypothesis, prior = if (!is.null(prior)) .posterior_draws_to_df(prior), ... diff --git a/R/diagnostic_posterior.R b/R/diagnostic_posterior.R index b98c0ff01..f2e75337e 100644 --- a/R/diagnostic_posterior.R +++ b/R/diagnostic_posterior.R @@ -493,9 +493,7 @@ diagnostic_posterior.stanfit <- function( #' @export -diagnostic_posterior.CmdStanFit <- function(posterior, - diagnostic = "all", - ...) { +diagnostic_posterior.CmdStanFit <- function(posterior, diagnostic = "all", ...) { if ("all" %in% diagnostic) { diagnostic <- c("ESS", "Rhat", "MCSE") } @@ -503,10 +501,10 @@ diagnostic_posterior.CmdStanFit <- function(posterior, insight::check_if_installed("posterior") insight::check_if_installed("cmdstanr") - draws <- posterior$draws(format = "draws_df") - out <- posterior::summarize_draws(draws, + out <- posterior::summarize_draws( + draws, posterior::default_convergence_measures(), MCSE = posterior::mcse_mean ) diff --git a/R/print.R b/R/print.R index 9cf9a5345..6e1a241e7 100644 --- a/R/print.R +++ b/R/print.R @@ -1,9 +1,11 @@ #' @rdname display.describe_posterior #' @export -print.describe_posterior <- function(x, - digits = 2, - caption = "Summary of Posterior Distribution", - ...) { +print.describe_posterior <- function( + x, + digits = 2, + caption = "Summary of Posterior Distribution", + ... +) { .print_default( x = x, digits = digits, @@ -14,10 +16,7 @@ print.describe_posterior <- function(x, #' @export -print.point_estimate <- function(x, - digits = 2, - caption = "Point Estimate", - ...) { +print.point_estimate <- function(x, digits = 2, caption = "Point Estimate", ...) { .print_default( x = x, digits = digits, @@ -28,10 +27,7 @@ print.point_estimate <- function(x, #' @export -print.p_direction <- function(x, - digits = 2, - caption = "Probability of Direction", - ...) { +print.p_direction <- function(x, digits = 2, caption = "Probability of Direction", ...) { .print_default( x = x, digits = digits, @@ -42,10 +38,7 @@ print.p_direction <- function(x, #' @export -print.p_map <- function(x, - digits = 2, - caption = "MAP-based p-value", - ...) { +print.p_map <- function(x, digits = 2, caption = "MAP-based p-value", ...) { .print_default( x = x, digits = digits, @@ -56,10 +49,7 @@ print.p_map <- function(x, #' @export -print.map_estimate <- function(x, - digits = 2, - caption = "MAP Estimate", - ...) { +print.map_estimate <- function(x, digits = 2, caption = "MAP Estimate", ...) { .print_default( x = x, digits = digits, @@ -122,10 +112,12 @@ print.p_significance <- function(x, digits = 2, ...) { #' @export -print.bayestestR_hdi <- function(x, - digits = 2, - caption = "Highest Density Interval", - ...) { +print.bayestestR_hdi <- function( + x, + digits = 2, + caption = "Highest Density Interval", + ... +) { ci_string <- "HDI" if (inherits(x, "bayestestR_spi")) { caption <- "Shortest Probability Interval" @@ -143,10 +135,7 @@ print.bayestestR_hdi <- function(x, #' @export -print.bayestestR_eti <- function(x, - digits = 2, - caption = "Equal-Tailed Interval", - ...) { +print.bayestestR_eti <- function(x, digits = 2, caption = "Equal-Tailed Interval", ...) { .print_default( x = x, digits = digits, @@ -158,10 +147,7 @@ print.bayestestR_eti <- function(x, #' @export -print.bayestestR_si <- function(x, - digits = 2, - caption = "Support Interval", - ...) { +print.bayestestR_si <- function(x, digits = 2, caption = "Support Interval", ...) { .print_default( x = x, digits = digits, @@ -174,14 +160,15 @@ print.bayestestR_si <- function(x, # special handling for bayes factors ------------------ - #' @export -print.bayesfactor_models <- function(x, - digits = 3, - log = FALSE, - show_names = FALSE, - caption = "Bayes Factors for Model Comparison", - ...) { +print.bayesfactor_models <- function( + x, + digits = 3, + log = FALSE, + show_names = FALSE, + caption = "Bayes Factors for Model Comparison", + ... +) { show_names <- show_names & !attr(x, "unsupported_models") .print_bf_default( x = x, @@ -196,11 +183,13 @@ print.bayesfactor_models <- function(x, #' @export -print.bayesfactor_inclusion <- function(x, - digits = 3, - log = FALSE, - caption = "Inclusion Bayes Factors (Model Averaged)", - ...) { +print.bayesfactor_inclusion <- function( + x, + digits = 3, + log = FALSE, + caption = "Inclusion Bayes Factors (Model Averaged)", + ... +) { .print_bf_default( x = x, digits = digits, @@ -212,11 +201,13 @@ print.bayesfactor_inclusion <- function(x, #' @export -print.bayesfactor_restricted <- function(x, - digits = 3, - log = FALSE, - caption = "Bayes Factor (Order-Restriction)", - ...) { +print.bayesfactor_restricted <- function( + x, + digits = 3, + log = FALSE, + caption = "Bayes Factor (Order-Restriction)", + ... +) { .print_bf_default( x = x, digits = digits, @@ -249,12 +240,14 @@ print.bayesfactor_parameters <- function(x, digits = 3, log = FALSE, ...) { # util --------------------- -.print_default <- function(x, - digits = 2, - caption = NULL, - subtitles = NULL, - ci_string = "CI", - ...) { +.print_default <- function( + x, + digits = 2, + caption = NULL, + subtitles = NULL, + ci_string = "CI", + ... +) { # retrieve information with cleaned parameter names cp <- attr(x, "clean_parameters") @@ -271,7 +264,11 @@ print.bayesfactor_parameters <- function(x, digits = 3, log = FALSE, ...) { ) # check if we have a 1x1 data frame (i.e. a numeric input) - if (is.data.frame(formatted_table) && nrow(formatted_table) == 1 && ncol(formatted_table) == 1) { + if ( + is.data.frame(formatted_table) && + nrow(formatted_table) == 1 && + ncol(formatted_table) == 1 + ) { # print for numeric caption <- attr(formatted_table, "table_caption") @@ -297,12 +294,14 @@ print.bayesfactor_parameters <- function(x, digits = 3, log = FALSE, ...) { } -.print_bf_default <- function(x, - digits = 3, - log = FALSE, - caption = NULL, - align = NULL, - ...) { +.print_bf_default <- function( + x, + digits = 3, + log = FALSE, + caption = NULL, + align = NULL, + ... +) { # format data frame and columns formatted_table <- format( x, diff --git a/R/print_html.R b/R/print_html.R index 9d765275b..879a71c7e 100644 --- a/R/print_html.R +++ b/R/print_html.R @@ -1,6 +1,11 @@ #' @rdname display.describe_posterior #' @export -print_html.describe_posterior <- function(x, digits = 2, caption = "Summary of Posterior Distribution", ...) { +print_html.describe_posterior <- function( + x, + digits = 2, + caption = "Summary of Posterior Distribution", + ... +) { .print_html_default(x = x, digits = digits, caption = caption, ...) } @@ -18,7 +23,12 @@ print_html.map_estimate <- function(x, digits = 2, caption = "MAP Estimate", ... #' @export -print_html.p_direction <- function(x, digits = 2, caption = "Probability of Direction (pd)", ...) { +print_html.p_direction <- function( + x, + digits = 2, + caption = "Probability of Direction (pd)", + ... +) { .print_html_default(x = x, digits = digits, caption = caption, ...) } @@ -65,18 +75,34 @@ print_html.p_significance <- function(x, digits = 2, ...) { ) ci_string <- NULL } - .print_html_default(x = x, digits = digits, caption = caption, ci_string = ci_string, ...) + .print_html_default( + x = x, + digits = digits, + caption = caption, + ci_string = ci_string, + ... + ) } #' @export -print_html.bayestestR_hdi <- function(x, digits = 2, caption = "Highest Density Interval", ...) { +print_html.bayestestR_hdi <- function( + x, + digits = 2, + caption = "Highest Density Interval", + ... +) { .print_html_default(x = x, digits = digits, caption = caption, ci_string = "HDI", ...) } #' @export -print_html.bayestestR_eti <- function(x, digits = 2, caption = "Equal-Tailed Interval", ...) { +print_html.bayestestR_eti <- function( + x, + digits = 2, + caption = "Equal-Tailed Interval", + ... +) { .print_html_default(x = x, digits = digits, caption = caption, ci_string = "ETI", ...) } @@ -89,14 +115,15 @@ print_html.bayestestR_si <- function(x, digits = 2, caption = "Support Interval" # special handling for bayes factors ------------------ - #' @export -print_html.bayesfactor_models <- function(x, - digits = 3, - log = FALSE, - show_names = FALSE, - caption = "Bayes Factors for Model Comparison", - ...) { +print_html.bayesfactor_models <- function( + x, + digits = 3, + log = FALSE, + show_names = FALSE, + caption = "Bayes Factors for Model Comparison", + ... +) { .print_bf_html_default( x = x, digits = digits, @@ -110,11 +137,13 @@ print_html.bayesfactor_models <- function(x, #' @export -print_html.bayesfactor_inclusion <- function(x, - digits = 3, - log = FALSE, - caption = "Inclusion Bayes Factors (Model Averaged)", - ...) { +print_html.bayesfactor_inclusion <- function( + x, + digits = 3, + log = FALSE, + caption = "Inclusion Bayes Factors (Model Averaged)", + ... +) { .print_bf_html_default( x = x, digits = digits, @@ -127,11 +156,13 @@ print_html.bayesfactor_inclusion <- function(x, #' @export -print_html.bayesfactor_restricted <- function(x, - digits = 3, - log = FALSE, - caption = "Bayes Factor (Order-Restriction)", - ...) { +print_html.bayesfactor_restricted <- function( + x, + digits = 3, + log = FALSE, + caption = "Bayes Factor (Order-Restriction)", + ... +) { .print_bf_html_default(x = x, digits = digits, log = log, caption = caption, ...) } @@ -157,8 +188,14 @@ print_html.bayesfactor_parameters <- function(x, digits = 3, log = FALSE, ...) { # util --------------- - -.print_html_default <- function(x, digits = 2, caption = NULL, subtitles = NULL, ci_string = "CI", ...) { +.print_html_default <- function( + x, + digits = 2, + caption = NULL, + subtitles = NULL, + ci_string = "CI", + ... +) { # retrieve information with cleaned parameter names cp <- attr(x, "clean_parameters") @@ -185,13 +222,15 @@ print_html.bayesfactor_parameters <- function(x, digits = 3, log = FALSE, ...) { } -.print_bf_html_default <- function(x, - digits = 3, - log = FALSE, - show_names = NULL, - caption = NULL, - align = NULL, - ...) { +.print_bf_html_default <- function( + x, + digits = 3, + log = FALSE, + show_names = NULL, + caption = NULL, + align = NULL, + ... +) { formatted_table <- format( x, digits = digits, diff --git a/R/print_md.R b/R/print_md.R index d58e8b916..cba172a91 100644 --- a/R/print_md.R +++ b/R/print_md.R @@ -1,6 +1,11 @@ #' @rdname display.describe_posterior #' @export -print_md.describe_posterior <- function(x, digits = 2, caption = "Summary of Posterior Distribution", ...) { +print_md.describe_posterior <- function( + x, + digits = 2, + caption = "Summary of Posterior Distribution", + ... +) { .print_md_default(x = x, digits = digits, caption = caption, ...) } @@ -18,7 +23,12 @@ print_md.map_estimate <- function(x, digits = 2, caption = "MAP Estimate", ...) #' @export -print_md.p_direction <- function(x, digits = 2, caption = "Probability of Direction (pd)", ...) { +print_md.p_direction <- function( + x, + digits = 2, + caption = "Probability of Direction (pd)", + ... +) { .print_md_default(x = x, digits = digits, caption = caption, ...) } @@ -70,13 +80,23 @@ print_md.p_significance <- function(x, digits = 2, ...) { #' @export -print_md.bayestestR_hdi <- function(x, digits = 2, caption = "Highest Density Interval", ...) { +print_md.bayestestR_hdi <- function( + x, + digits = 2, + caption = "Highest Density Interval", + ... +) { .print_md_default(x = x, digits = digits, caption = caption, ci_string = "HDI", ...) } #' @export -print_md.bayestestR_eti <- function(x, digits = 2, caption = "Equal-Tailed Interval", ...) { +print_md.bayestestR_eti <- function( + x, + digits = 2, + caption = "Equal-Tailed Interval", + ... +) { .print_md_default(x = x, digits = digits, caption = caption, ci_string = "ETI", ...) } @@ -89,14 +109,15 @@ print_md.bayestestR_si <- function(x, digits = 2, caption = "Support Interval", # special handling for bayes factors ------------------ - #' @export -print_md.bayesfactor_models <- function(x, - digits = 3, - log = FALSE, - show_names = FALSE, - caption = "Bayes Factors for Model Comparison", - ...) { +print_md.bayesfactor_models <- function( + x, + digits = 3, + log = FALSE, + show_names = FALSE, + caption = "Bayes Factors for Model Comparison", + ... +) { .print_bf_md_default( x = x, digits = digits, @@ -110,11 +131,13 @@ print_md.bayesfactor_models <- function(x, #' @export -print_md.bayesfactor_inclusion <- function(x, - digits = 3, - log = FALSE, - caption = "Inclusion Bayes Factors (Model Averaged)", - ...) { +print_md.bayesfactor_inclusion <- function( + x, + digits = 3, + log = FALSE, + caption = "Inclusion Bayes Factors (Model Averaged)", + ... +) { .print_bf_md_default( x = x, digits = digits, @@ -127,11 +150,13 @@ print_md.bayesfactor_inclusion <- function(x, #' @export -print_md.bayesfactor_restricted <- function(x, - digits = 3, - log = FALSE, - caption = "Bayes Factor (Order-Restriction)", - ...) { +print_md.bayesfactor_restricted <- function( + x, + digits = 3, + log = FALSE, + caption = "Bayes Factor (Order-Restriction)", + ... +) { .print_bf_md_default(x = x, digits = digits, log = log, caption = caption, ...) } @@ -157,8 +182,14 @@ print_md.bayesfactor_parameters <- function(x, digits = 3, log = FALSE, ...) { # util --------------- - -.print_md_default <- function(x, digits = 2, caption = NULL, subtitles = NULL, ci_string = "CI", ...) { +.print_md_default <- function( + x, + digits = 2, + caption = NULL, + subtitles = NULL, + ci_string = "CI", + ... +) { # retrieve information with cleaned parameter names cp <- attr(x, "clean_parameters") @@ -184,13 +215,15 @@ print_md.bayesfactor_parameters <- function(x, digits = 3, log = FALSE, ...) { } -.print_bf_md_default <- function(x, - digits = 3, - log = FALSE, - show_names = NULL, - caption = NULL, - align = NULL, - ...) { +.print_bf_md_default <- function( + x, + digits = 3, + log = FALSE, + show_names = NULL, + caption = NULL, + align = NULL, + ... +) { formatted_table <- format( x, digits = digits, diff --git a/R/si.R b/R/si.R index e9f50b140..2efefc4ba 100644 --- a/R/si.R +++ b/R/si.R @@ -104,8 +104,11 @@ si.numeric <- function(posterior, prior = NULL, BF = 1, verbose = TRUE, ...) { # Get SIs out <- si.data.frame( - posterior = posterior, prior = prior, - BF = BF, verbose = verbose, ... + posterior = posterior, + prior = prior, + BF = BF, + verbose = verbose, + ... ) out$Parameter <- NULL out @@ -114,23 +117,34 @@ si.numeric <- function(posterior, prior = NULL, BF = 1, verbose = TRUE, ...) { #' @rdname si #' @export -si.stanreg <- function(posterior, prior = NULL, - BF = 1, verbose = TRUE, - effects = "fixed", - component = "location", - parameters = NULL, - ...) { +si.stanreg <- function( + posterior, + prior = NULL, + BF = 1, + verbose = TRUE, + effects = "fixed", + component = "location", + parameters = NULL, + ... +) { cleaned_parameters <- .get_cleaned_parameters(posterior, ...) - samps <- .clean_priors_and_posteriors(posterior, prior, - effects = effects, component = component, - parameters = parameters, verbose = verbose + samps <- .clean_priors_and_posteriors( + posterior, + prior, + effects = effects, + component = component, + parameters = parameters, + verbose = verbose ) # Get SIs temp <- si.data.frame( - posterior = samps$posterior, prior = samps$prior, - BF = BF, verbose = verbose, ... + posterior = samps$posterior, + prior = samps$prior, + BF = BF, + verbose = verbose, + ... ) out <- .prepare_output(temp, cleaned_parameters, inherits(posterior, "stanmvreg")) @@ -151,14 +165,16 @@ si.blavaan <- si.stanreg #' @export -si.emmGrid <- function(posterior, prior = NULL, - BF = 1, verbose = TRUE, ...) { +si.emmGrid <- function(posterior, prior = NULL, BF = 1, verbose = TRUE, ...) { samps <- .clean_priors_and_posteriors(posterior, prior, verbose = verbose) # Get SIs out <- si.data.frame( - posterior = samps$posterior, prior = samps$prior, - BF = BF, verbose = verbose, ... + posterior = samps$posterior, + prior = samps$prior, + BF = BF, + verbose = verbose, + ... ) out <- .append_datagrid(out, posterior, long = length(BF) > 1L) @@ -181,9 +197,19 @@ si.predictions <- si.emmGrid #' @export -si.stanfit <- function(posterior, prior = NULL, BF = 1, verbose = TRUE, effects = "fixed", ...) { - out <- si(insight::get_parameters(posterior, effects = effects, verbose = verbose), - prior = prior, BF = BF, verbose = verbose +si.stanfit <- function( + posterior, + prior = NULL, + BF = 1, + verbose = TRUE, + effects = "fixed", + ... +) { + out <- si( + insight::get_parameters(posterior, effects = effects, verbose = verbose), + prior = prior, + BF = BF, + verbose = verbose ) attr(out, "object_name") <- insight::safe_deparse_symbol(substitute(posterior)) out @@ -192,7 +218,14 @@ si.stanfit <- function(posterior, prior = NULL, BF = 1, verbose = TRUE, effects #' @rdname si #' @export -si.get_predicted <- function(posterior, prior = NULL, BF = 1, use_iterations = FALSE, verbose = TRUE, ...) { +si.get_predicted <- function( + posterior, + prior = NULL, + BF = 1, + use_iterations = FALSE, + verbose = TRUE, + ... +) { if (isTRUE(use_iterations)) { if ("iterations" %in% names(attributes(posterior))) { out <- si( @@ -207,7 +240,13 @@ si.get_predicted <- function(posterior, prior = NULL, BF = 1, use_iterations = F } attr(out, "object_name") <- insight::safe_deparse_symbol(substitute(posterior)) } else { - out <- si(insight::get_parameters(posterior), prior = prior, BF = BF, verbose = verbose, ...) + out <- si( + insight::get_parameters(posterior), + prior = prior, + BF = BF, + verbose = verbose, + ... + ) } out } @@ -216,7 +255,14 @@ si.get_predicted <- function(posterior, prior = NULL, BF = 1, use_iterations = F #' @rdname si #' @inheritParams p_direction #' @export -si.data.frame <- function(posterior, prior = NULL, BF = 1, rvar_col = NULL, verbose = TRUE, ...) { +si.data.frame <- function( + posterior, + prior = NULL, + BF = 1, + rvar_col = NULL, + verbose = TRUE, + ... +) { x_rvar <- .possibly_extract_rvar_col(posterior, rvar_col) if (length(x_rvar) > 0L) { cl <- match.call() @@ -259,7 +305,13 @@ si.data.frame <- function(posterior, prior = NULL, BF = 1, rvar_col = NULL, verb attr(out, "ci_method") <- "SI" attr(out, "ci") <- BF attr(out, "plot_data") <- .make_BF_plot_data(posterior, prior, 0, 0, ...)$plot_data - class(out) <- unique(c("bayestestR_si", "see_si", "bayestestR_ci", "see_ci", class(out))) + class(out) <- unique(c( + "bayestestR_si", + "see_si", + "bayestestR_ci", + "see_ci", + class(out) + )) out } @@ -267,9 +319,12 @@ si.data.frame <- function(posterior, prior = NULL, BF = 1, rvar_col = NULL, verb #' @export si.draws <- function(posterior, prior = NULL, BF = 1, verbose = TRUE, ...) { - si(.posterior_draws_to_df(posterior), + si( + .posterior_draws_to_df(posterior), prior = if (!is.null(prior)) .posterior_draws_to_df(prior), - BF = BF, verbose = verbose, ... + BF = BF, + verbose = verbose, + ... ) } @@ -282,12 +337,7 @@ si.rvar <- si.draws .si.data.frame <- function(posterior, prior, BF, verbose = TRUE, ...) { sis <- matrix(NA, nrow = ncol(posterior), ncol = 2) for (par in seq_along(posterior)) { - sis[par, ] <- .si(posterior[[par]], - prior[[par]], - BF = BF, - verbose = verbose, - ... - ) + sis[par, ] <- .si(posterior[[par]], prior[[par]], BF = BF, verbose = verbose, ...) } data.frame( @@ -301,7 +351,15 @@ si.rvar <- si.draws #' @keywords internal -.si <- function(posterior, prior, BF = 1, extend_scale = 0.05, precision = 2^8, verbose = TRUE, ...) { +.si <- function( + posterior, + prior, + BF = 1, + extend_scale = 0.05, + precision = 2^8, + verbose = TRUE, + ... +) { insight::check_if_installed("logspline") if (isTRUE(all.equal(prior, posterior))) { diff --git a/R/spi.R b/R/spi.R index 7a67bf4bc..91b511a18 100644 --- a/R/spi.R +++ b/R/spi.R @@ -49,17 +49,31 @@ spi <- function(x, ...) { #' @export spi.default <- function(x, ...) { - insight::format_error(paste0("'spi()' is not yet implemented for objects of class '", class(x)[1], "'.")) + insight::format_error(paste0( + "'spi()' is not yet implemented for objects of class '", + class(x)[1], + "'." + )) } #' @rdname spi #' @export spi.numeric <- function(x, ci = 0.95, verbose = TRUE, ...) { - out <- do.call(rbind, lapply(ci, function(i) { - .spi(x = x, ci = i, verbose = verbose) - })) - class(out) <- unique(c("bayestestR_hdi", "see_hdi", "bayestestR_ci", "see_ci", "bayestestR_spi", class(out))) + out <- do.call( + rbind, + lapply(ci, function(i) { + .spi(x = x, ci = i, verbose = verbose) + }) + ) + class(out) <- unique(c( + "bayestestR_hdi", + "see_hdi", + "bayestestR_ci", + "see_ci", + "bayestestR_spi", + class(out) + )) attr(out, "data") <- x out } @@ -91,7 +105,12 @@ spi.data.frame <- function(x, ci = 0.95, rvar_col = NULL, verbose = TRUE, ...) { #' @export spi.draws <- function(x, ci = 0.95, verbose = TRUE, ...) { - dat <- .compute_interval_dataframe(x = .posterior_draws_to_df(x), ci = ci, verbose = verbose, fun = "spi") + dat <- .compute_interval_dataframe( + x = .posterior_draws_to_df(x), + ci = ci, + verbose = verbose, + fun = "spi" + ) attr(dat, "object_name") <- insight::safe_deparse_symbol(substitute(x)) dat } @@ -107,11 +126,7 @@ spi.MCMCglmm <- function(x, ci = 0.95, verbose = TRUE, ...) { #' @export -spi.bamlss <- function(x, - ci = 0.95, - component = "all", - verbose = TRUE, - ...) { +spi.bamlss <- function(x, ci = 0.95, component = "all", verbose = TRUE, ...) { hdi(x, ci = ci, component = component, verbose = verbose, ci_method = "spi") } @@ -137,12 +152,14 @@ spi.mcmc.list <- spi.mcmc spi.BGGM <- spi.mcmc #' @export -spi.sim.merMod <- function(x, - ci = 0.95, - effects = "fixed", - parameters = NULL, - verbose = TRUE, - ...) { +spi.sim.merMod <- function( + x, + ci = 0.95, + effects = "fixed", + parameters = NULL, + verbose = TRUE, + ... +) { hdi( x, ci = ci, @@ -188,13 +205,15 @@ spi.predictions <- spi.slopes #' @export -spi.stanreg <- function(x, - ci = 0.95, - effects = "fixed", - component = "location", - parameters = NULL, - verbose = TRUE, - ...) { +spi.stanreg <- function( + x, + ci = 0.95, + effects = "fixed", + component = "location", + parameters = NULL, + verbose = TRUE, + ... +) { cleaned_parameters <- .get_cleaned_parameters(x, ...) out <- .prepare_output( @@ -232,13 +251,15 @@ spi.blavaan <- spi.stanreg #' @rdname spi #' @export -spi.brmsfit <- function(x, - ci = 0.95, - effects = "fixed", - component = "conditional", - parameters = NULL, - verbose = TRUE, - ...) { +spi.brmsfit <- function( + x, + ci = 0.95, + effects = "fixed", + component = "conditional", + parameters = NULL, + verbose = TRUE, + ... +) { cleaned_parameters <- insight::clean_parameters(x) out <- .prepare_output( @@ -277,7 +298,12 @@ spi.BFBayesFactor <- function(x, ci = 0.95, verbose = TRUE, ...) { spi.get_predicted <- function(x, ci = 0.95, use_iterations = FALSE, verbose = TRUE, ...) { if (isTRUE(use_iterations)) { if ("iterations" %in% names(attributes(x))) { - out <- spi(as.data.frame(t(attributes(x)$iterations)), ci = ci, verbose = verbose, ...) + out <- spi( + as.data.frame(t(attributes(x)$iterations)), + ci = ci, + verbose = verbose, + ... + ) } else { insight::format_error("No iterations present in the output.") } @@ -323,7 +349,6 @@ spi.get_predicted <- function(x, ci = 0.95, use_iterations = FALSE, verbose = TR k <- which(x == l)[1] ui <- which(x == u)[1] - # lower bound if (!anyNA(k) && all(k == 1)) { x.l <- l @@ -332,7 +357,14 @@ spi.get_predicted <- function(x, ci = 0.95, use_iterations = FALSE, verbose = TR frac <- 1 while (is.null(x.l)) { frac <- frac - 0.1 - x.l <- .safe(.spi_lower(bw = frac * bw, n.sims = n.sims, k = k, l = l, dens = dens, x = x)) + x.l <- .safe(.spi_lower( + bw = frac * bw, + n.sims = n.sims, + k = k, + l = l, + dens = dens, + x = x + )) if (frac <= 0.1) { insight::format_alert("Could not find a solution for the SPI lower bound.") x.l <- NA @@ -348,7 +380,14 @@ spi.get_predicted <- function(x, ci = 0.95, use_iterations = FALSE, verbose = TR frac <- 1 while (is.null(x.u)) { frac <- frac - 0.1 - x.u <- .safe(.spi_upper(bw = frac * bw, n.sims = n.sims, ui = ui, u = u, dens = dens, x = x)) + x.u <- .safe(.spi_upper( + bw = frac * bw, + n.sims = n.sims, + ui = ui, + u = u, + dens = dens, + x = x + )) if (frac <= 0.1) { insight::format_alert("Could not find a solution for the SPI upper bound.") x.u <- NA diff --git a/R/utils_bayesfactor.R b/R/utils_bayesfactor.R index a04aedf10..c73b31cdf 100644 --- a/R/utils_bayesfactor.R +++ b/R/utils_bayesfactor.R @@ -6,20 +6,18 @@ } #' @keywords internal -.clean_priors_and_posteriors.stanreg <- function(posterior, prior, - verbose = TRUE, - ...) { +.clean_priors_and_posteriors.stanreg <- function(posterior, prior, verbose = TRUE, ...) { # Get Priors if (is.null(prior)) { prior <- posterior } - prior <- try(unupdate(prior, verbose = verbose), silent = TRUE) if (methods::is(prior, "try-error")) { if (grepl("flat priors", prior, fixed = TRUE)) { prior <- paste0( - prior, "Could not therefore compute Bayes factors, as these inform about ", + prior, + "Could not therefore compute Bayes factors, as these inform about ", "the raltive likelihood of two 'hypotheses', and flat priors provide no ", "likelihood.\n", "See '?bayesfactor_parameters' for more information.\n" @@ -42,8 +40,7 @@ #' @keywords internal -.clean_priors_and_posteriors.blavaan <- function(posterior, prior, - verbose = TRUE, ...) { +.clean_priors_and_posteriors.blavaan <- function(posterior, prior, verbose = TRUE, ...) { # Get Priors if (is.null(prior)) { prior <- posterior @@ -62,20 +59,18 @@ #' @keywords internal -.clean_priors_and_posteriors.emmGrid <- function(posterior, - prior, - verbose = TRUE, - ...) { +.clean_priors_and_posteriors.emmGrid <- function(posterior, prior, verbose = TRUE, ...) { insight::check_if_installed("emmeans") if (is.null(prior)) { prior <- posterior if (verbose) { - insight::format_warning("Prior not specified! Please provide the original model to get meaningful results.") + insight::format_warning( + "Prior not specified! Please provide the original model to get meaningful results." + ) } } - if (!inherits(prior, "emmGrid")) { # then is it a model on.exit( @@ -93,13 +88,13 @@ insight::format_error("Cannot rebuild prior emmGrid from a brmsfit model.") } - prior <- try(unupdate(prior, verbose = verbose), silent = TRUE) if (inherits(prior, "try-error")) { on.exit() # undo general error message if (grepl("flat priors", prior, fixed = TRUE)) { prior <- paste0( - prior, "Could not therefore compute Bayes factors, as these inform about ", + prior, + "Could not therefore compute Bayes factors, as these inform about ", "the raltive likelihood of two 'hypotheses', and flat priors provide no ", "likelihood.\n", "See '?bayesfactor_parameters' for more information.\n" @@ -127,12 +122,13 @@ ) } -.clean_priors_and_posteriors.emm_list <- function(posterior, prior, - verbose = TRUE, ...) { +.clean_priors_and_posteriors.emm_list <- function(posterior, prior, verbose = TRUE, ...) { if (is.null(prior)) { prior <- posterior if (verbose) { - insight::format_warning("Prior not specified! Please provide the original model to get meaningful results.") + insight::format_warning( + "Prior not specified! Please provide the original model to get meaningful results." + ) } } @@ -147,7 +143,8 @@ if (inherits(prior, "try-error")) { if (grepl("flat priors", prior, fixed = TRUE)) { prior <- paste0( - prior, "Could not therefore compute Bayes factors, as these inform about ", + prior, + "Could not therefore compute Bayes factors, as these inform about ", "the raltive likelihood of two 'hypotheses', and flat priors provide no ", "likelihood.\n", "See '?bayesfactor_parameters' for more information.\n" @@ -172,19 +169,19 @@ posterior <- do.call("cbind", lapply(res, "[[", "posterior")) prior <- do.call("cbind", lapply(res, "[[", "prior")) - list( posterior = posterior, prior = prior ) } -.clean_priors_and_posteriors.slopes <- function(posterior, prior, - verbose = TRUE, ...) { +.clean_priors_and_posteriors.slopes <- function(posterior, prior, verbose = TRUE, ...) { if (is.null(prior)) { prior <- posterior if (verbose) { - insight::format_warning("Prior not specified! Please provide the original model to get meaningful results.") + insight::format_warning( + "Prior not specified! Please provide the original model to get meaningful results." + ) } } @@ -201,9 +198,7 @@ .clean_priors_and_posteriors.comparisons <- .clean_priors_and_posteriors.slopes -.clean_priors_and_posteriors.stanfit <- function(posterior, prior, - verbose = TRUE, - ...) { +.clean_priors_and_posteriors.stanfit <- function(posterior, prior, verbose = TRUE, ...) { posterior <- insight::get_parameters(posterior) # Get Priors @@ -270,7 +265,9 @@ get_parameters.CmdStanFit <- function(x, ...) { tmp_terms <- .make_terms(df.model$Modelnames[m]) if (length(tmp_terms) > 0) { missing_terms <- !tmp_terms %in% colnames(df.model) - if (any(missing_terms)) df.model[, tmp_terms[missing_terms]] <- NA + if (any(missing_terms)) { + df.model[, tmp_terms[missing_terms]] <- NA + } df.model[m, tmp_terms] <- TRUE } } @@ -316,7 +313,11 @@ get_parameters.CmdStanFit <- function(x, ...) { tmp_trms <- attr(stats::terms.formula(tmp_random[[i]]), "term.labels") tmp_trms <- sapply(tmp_trms, sort_interactions) - if (!any(unlist(strsplit(as.character(tmp_random[[i]])[[2]], " + ", fixed = TRUE)) == "0")) { + if ( + !any( + unlist(strsplit(as.character(tmp_random[[i]])[[2]], " + ", fixed = TRUE)) == "0" + ) + ) { tmp_trms <- c("1", tmp_trms) } @@ -329,13 +330,15 @@ get_parameters.CmdStanFit <- function(x, ...) { # make_BF_plot_data ------------------------------------------------------- #' @keywords internal -.make_BF_plot_data <- function(posterior, - prior, - direction, - null, - extend_scale = 0.05, - precision = 2^8, - ...) { +.make_BF_plot_data <- function( + posterior, + prior, + direction, + null, + extend_scale = 0.05, + precision = 2^8, + ... +) { insight::check_if_installed("logspline") estimate_samples_density <- function(samples) { diff --git a/R/utils_check_collinearity.R b/R/utils_check_collinearity.R index 7710ecd51..12a61d3b7 100644 --- a/R/utils_check_collinearity.R +++ b/R/utils_check_collinearity.R @@ -1,7 +1,10 @@ #' @keywords internal -.check_multicollinearity <- function(model, - method = "equivalence_test", - threshold = 0.7, ...) { +.check_multicollinearity <- function( + model, + method = "equivalence_test", + threshold = 0.7, + ... +) { if (inherits(model, "CmdStanFit")) { return() } @@ -30,7 +33,9 @@ results <- cbind( parameter, corr = abs(as.vector(expand.grid(parameter_correlation)[[1]])), - pvalue = apply(parameter, 1, function(r) stats::cor.test(dat[[r[1]]], dat[[r[2]]])$p.value) + pvalue = apply(parameter, 1, function(r) { + stats::cor.test(dat[[r[1]]], dat[[r[2]]])$p.value + }) ) # Filter @@ -52,7 +57,11 @@ threshold <- pmin(threshold, 0.9) results <- results[results$corr > threshold & results$corr <= 0.9, ] if (nrow(results) > 0) { - where <- paste0("between ", toString(paste0(results$where, " (r = ", round(results$corr, 2), ")")), "") + where <- paste0( + "between ", + toString(paste0(results$where, " (r = ", round(results$corr, 2), ")")), + "" + ) insight::format_alert(paste0( "Possible multicollinearity ", where, @@ -65,7 +74,11 @@ # Filter by second threshold results <- results[results$corr > 0.9, ] if (nrow(results) > 0) { - where <- paste0("between ", toString(paste0(results$where, " (r = ", round(results$corr, 2), ")")), "") + where <- paste0( + "between ", + toString(paste0(results$where, " (r = ", round(results$corr, 2), ")")), + "" + ) insight::format_alert(paste0( "Probable multicollinearity ", where, diff --git a/R/weighted_posteriors.R b/R/weighted_posteriors.R index dd274550a..82a97e8a0 100644 --- a/R/weighted_posteriors.R +++ b/R/weighted_posteriors.R @@ -133,7 +133,12 @@ weighted_posteriors <- function(..., prior_odds = NULL, missing = 0, verbose = T #' @export #' @rdname weighted_posteriors -weighted_posteriors.data.frame <- function(..., prior_odds = NULL, missing = 0, verbose = TRUE) { +weighted_posteriors.data.frame <- function( + ..., + prior_odds = NULL, + missing = 0, + verbose = TRUE +) { Mods <- list(...) mnames <- sapply(match.call(expand.dots = FALSE)$`...`, insight::safe_deparse) @@ -163,13 +168,15 @@ weighted_posteriors.data.frame <- function(..., prior_odds = NULL, missing = 0, #' @export #' @rdname weighted_posteriors -weighted_posteriors.stanreg <- function(..., - prior_odds = NULL, - missing = 0, - verbose = TRUE, - effects = "fixed", - component = "conditional", - parameters = NULL) { +weighted_posteriors.stanreg <- function( + ..., + prior_odds = NULL, + missing = 0, + verbose = TRUE, + effects = "fixed", + component = "conditional", + parameters = NULL +) { Mods <- list(...) mnames <- sapply(match.call(expand.dots = FALSE)$`...`, insight::safe_deparse) @@ -185,7 +192,9 @@ weighted_posteriors.stanreg <- function(..., weighted_samps <- round(iterations * postProbs) # extract parameters - params <- lapply(Mods, insight::get_parameters, + params <- lapply( + Mods, + insight::get_parameters, effects = effects, component = component, parameters = parameters @@ -202,11 +211,13 @@ weighted_posteriors.blavaan <- weighted_posteriors.stanreg #' @rdname weighted_posteriors #' @export -weighted_posteriors.BFBayesFactor <- function(..., - prior_odds = NULL, - missing = 0, - verbose = TRUE, - iterations = 4000) { +weighted_posteriors.BFBayesFactor <- function( + ..., + prior_odds = NULL, + missing = 0, + verbose = TRUE, + iterations = 4000 +) { Mods <- c(...) # Get Bayes factors @@ -237,11 +248,16 @@ weighted_posteriors.BFBayesFactor <- function(..., ) } else if (m == 1) { # If the model is the "den" model - params[[m]] <- BayesFactor::posterior(1 / Mods[1], iterations = iterations, progress = FALSE) + params[[m]] <- BayesFactor::posterior( + 1 / Mods[1], + iterations = iterations, + progress = FALSE + ) } else { params[[m]] <- BayesFactor::posterior( Mods[m - 1], - iterations = iterations, progress = FALSE + iterations = iterations, + progress = FALSE ) } } @@ -286,6 +302,8 @@ weighted_posteriors.BFBayesFactor <- function(..., #' @keywords internal .total_samps <- function(mod) { x <- insight::find_algorithm(mod) - if (is.null(x$iterations)) x$iterations <- x$sample + if (is.null(x$iterations)) { + x$iterations <- x$sample + } x$chains * (x$iterations - x$warmup) } diff --git a/man/bayesfactor_parameters.Rd b/man/bayesfactor_parameters.Rd index 3524a3824..708360ca5 100644 --- a/man/bayesfactor_parameters.Rd +++ b/man/bayesfactor_parameters.Rd @@ -12,14 +12,7 @@ \alias{bayesfactor_parameters.data.frame} \title{Bayes Factors (BF) for a Single Parameter} \usage{ -bayesfactor_parameters( - posterior, - prior = NULL, - direction = "two-sided", - null = 0, - ..., - verbose = TRUE -) +bayesfactor_parameters(posterior, ..., verbose = TRUE) bayesfactor_pointnull( posterior, @@ -39,14 +32,7 @@ bayesfactor_rope( verbose = TRUE ) -bf_parameters( - posterior, - prior = NULL, - direction = "two-sided", - null = 0, - ..., - verbose = TRUE -) +bf_parameters(posterior, ..., verbose = TRUE) bf_pointnull( posterior, @@ -102,6 +88,11 @@ bf_rope( \code{emmGrid} or a data frame - representing a posterior distribution(s) from (see 'Details').} +\item{...}{Arguments passed to and from other methods. (Can be used to pass +arguments to internal \code{\link[logspline:logspline]{logspline::logspline()}}.)} + +\item{verbose}{Toggle off warnings.} + \item{prior}{An object representing a prior distribution (see 'Details').} \item{direction}{Test type (see 'Details'). One of \code{0}, @@ -111,11 +102,6 @@ tailed) or \code{1}, \code{"right"} (right tailed).} \item{null}{Value of the null, either a scalar (for point-null) or a range (for a interval-null).} -\item{...}{Arguments passed to and from other methods. (Can be used to pass -arguments to internal \code{\link[logspline:logspline]{logspline::logspline()}}.)} - -\item{verbose}{Toggle off warnings.} - \item{effects}{Should variables for fixed effects (\code{"fixed"}), random effects (\code{"random"}) or both (\code{"all"}) be returned? Only applies to mixed models. May be abbreviated. diff --git a/man/contr.equalprior.Rd b/man/contr.equalprior.Rd index 7a284f0d0..916dd1456 100644 --- a/man/contr.equalprior.Rd +++ b/man/contr.equalprior.Rd @@ -21,7 +21,7 @@ contr.equalprior_deviations(n, contrasts = TRUE, sparse = FALSE) computed.} \item{sparse}{logical indicating if the result should be sparse - (of class \code{\link[Matrix:dgCMatrix-class]{dgCMatrix}}), using + (of class \code{\linkS4class[Matrix]{dgCMatrix}}), using package \href{https://CRAN.R-project.org/package=Matrix}{\pkg{Matrix}}.} } \value{ diff --git a/tests/testthat/test-bayesfactor_models.R b/tests/testthat/test-bayesfactor_models.R index 6c05b0731..d1ecbbba2 100644 --- a/tests/testthat/test-bayesfactor_models.R +++ b/tests/testthat/test-bayesfactor_models.R @@ -6,9 +6,18 @@ test_that("bayesfactor_models BIC", { mo1 <- lme4::lmer(Sepal.Length ~ (1 | Species), data = iris) mo2 <- lme4::lmer(Sepal.Length ~ Petal.Length + (1 | Species), data = iris) mo3 <- lme4::lmer(Sepal.Length ~ Petal.Length + (Petal.Length | Species), data = iris) - mo4 <- lme4::lmer(Sepal.Length ~ Petal.Length + Petal.Width + (Petal.Length | Species), data = iris) - mo5 <- lme4::lmer(Sepal.Length ~ Petal.Length * Petal.Width + (Petal.Length | Species), data = iris) - mo4_e <- lme4::lmer(Sepal.Length ~ Petal.Length + Petal.Width + (Petal.Length | Species), data = iris[-1, ]) + mo4 <- lme4::lmer( + Sepal.Length ~ Petal.Length + Petal.Width + (Petal.Length | Species), + data = iris + ) + mo5 <- lme4::lmer( + Sepal.Length ~ Petal.Length * Petal.Width + (Petal.Length | Species), + data = iris + ) + mo4_e <- lme4::lmer( + Sepal.Length ~ Petal.Length + Petal.Width + (Petal.Length | Species), + data = iris[-1, ] + ) })) # both uses of denominator @@ -22,7 +31,8 @@ test_that("bayesfactor_models BIC", { expect_equal( BFM1, bayesfactor_models(list(mo2 = mo2, mo3 = mo3, mo4 = mo4, mo1 = mo1), denominator = 4), - tolerance = 1e-4, ignore_attr = TRUE + tolerance = 1e-4, + ignore_attr = TRUE ) # only on same data! @@ -32,7 +42,8 @@ test_that("bayesfactor_models BIC", { expect_equal(update(BFM2, subset = c(1, 2))$log_BF, c(1, 57.3, 54.52), tolerance = 0.1) # update reference - expect_equal(update(BFM2, reference = 1)$log_BF, + expect_equal( + update(BFM2, reference = 1)$log_BF, c(0, -2.8, -6.2, -57.4), tolerance = 0.1 ) @@ -117,7 +128,6 @@ test_that("bayesfactor_models STAN", { diagnostic_file = file.path(tempdir(), "df1.csv") )) - set.seed(333) # compare against bridgesampling bridge_BF <- bridgesampling::bayes_factor( bridgesampling::bridge_sampler(stan_bf_1, silent = TRUE), @@ -209,7 +219,8 @@ test_that("bayesfactor_inclusion | BayesFactor", { expect_equal( bayesfactor_inclusion(BF_ToothGrowth), bayesfactor_inclusion(bayesfactor_models(BF_ToothGrowth)), - tolerance = 1e-4, ignore_attr = TRUE + tolerance = 1e-4, + ignore_attr = TRUE ) }) @@ -228,6 +239,14 @@ test_that("bayesfactor_inclusion | LMM", { # plus match_models bfinc_matched <- bayesfactor_inclusion(BFM4, match_models = TRUE) expect_equal(bfinc_matched$p_prior, c(1, 0.2, 0.6, 0.2, 0.2), tolerance = 0.1) - expect_equal(bfinc_matched$p_posterior, c(1, 0.875, 0.125, 0.009, 0.002), tolerance = 0.1) - expect_equal(bfinc_matched$log_BF, c(NaN, 58.904, -3.045, -3.573, -1.493), tolerance = 0.1) + expect_equal( + bfinc_matched$p_posterior, + c(1, 0.875, 0.125, 0.009, 0.002), + tolerance = 0.1 + ) + expect_equal( + bfinc_matched$log_BF, + c(NaN, 58.904, -3.045, -3.573, -1.493), + tolerance = 0.1 + ) }) From 7b8d37b9f2602cd0f0c37630970859140c858a38 Mon Sep 17 00:00:00 2001 From: "Mattan S. Ben-Shachar" Date: Sat, 9 May 2026 00:22:11 +0300 Subject: [PATCH 25/36] roxygen --- man/bayesfactor_parameters.Rd | 28 +++++++++++++++++++++------- man/contr.equalprior.Rd | 2 +- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/man/bayesfactor_parameters.Rd b/man/bayesfactor_parameters.Rd index 708360ca5..3524a3824 100644 --- a/man/bayesfactor_parameters.Rd +++ b/man/bayesfactor_parameters.Rd @@ -12,7 +12,14 @@ \alias{bayesfactor_parameters.data.frame} \title{Bayes Factors (BF) for a Single Parameter} \usage{ -bayesfactor_parameters(posterior, ..., verbose = TRUE) +bayesfactor_parameters( + posterior, + prior = NULL, + direction = "two-sided", + null = 0, + ..., + verbose = TRUE +) bayesfactor_pointnull( posterior, @@ -32,7 +39,14 @@ bayesfactor_rope( verbose = TRUE ) -bf_parameters(posterior, ..., verbose = TRUE) +bf_parameters( + posterior, + prior = NULL, + direction = "two-sided", + null = 0, + ..., + verbose = TRUE +) bf_pointnull( posterior, @@ -88,11 +102,6 @@ bf_rope( \code{emmGrid} or a data frame - representing a posterior distribution(s) from (see 'Details').} -\item{...}{Arguments passed to and from other methods. (Can be used to pass -arguments to internal \code{\link[logspline:logspline]{logspline::logspline()}}.)} - -\item{verbose}{Toggle off warnings.} - \item{prior}{An object representing a prior distribution (see 'Details').} \item{direction}{Test type (see 'Details'). One of \code{0}, @@ -102,6 +111,11 @@ tailed) or \code{1}, \code{"right"} (right tailed).} \item{null}{Value of the null, either a scalar (for point-null) or a range (for a interval-null).} +\item{...}{Arguments passed to and from other methods. (Can be used to pass +arguments to internal \code{\link[logspline:logspline]{logspline::logspline()}}.)} + +\item{verbose}{Toggle off warnings.} + \item{effects}{Should variables for fixed effects (\code{"fixed"}), random effects (\code{"random"}) or both (\code{"all"}) be returned? Only applies to mixed models. May be abbreviated. diff --git a/man/contr.equalprior.Rd b/man/contr.equalprior.Rd index 916dd1456..7a284f0d0 100644 --- a/man/contr.equalprior.Rd +++ b/man/contr.equalprior.Rd @@ -21,7 +21,7 @@ contr.equalprior_deviations(n, contrasts = TRUE, sparse = FALSE) computed.} \item{sparse}{logical indicating if the result should be sparse - (of class \code{\linkS4class[Matrix]{dgCMatrix}}), using + (of class \code{\link[Matrix:dgCMatrix-class]{dgCMatrix}}), using package \href{https://CRAN.R-project.org/package=Matrix}{\pkg{Matrix}}.} } \value{ From 37d4976ab41ed26cce400139482152048e79f4b4 Mon Sep 17 00:00:00 2001 From: "Mattan S. Ben-Shachar" Date: Sat, 9 May 2026 00:22:38 +0300 Subject: [PATCH 26/36] new matrix method for bf_res --- NAMESPACE | 1 + R/bayesfactor_restricted.R | 29 +++++++++++++++++++++++++++++ man/bayesfactor_restricted.Rd | 9 +++++++++ 3 files changed, 39 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 9209b8452..174199013 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -54,6 +54,7 @@ S3method(bayesfactor_restricted,data.frame) S3method(bayesfactor_restricted,draws) S3method(bayesfactor_restricted,emmGrid) S3method(bayesfactor_restricted,emm_list) +S3method(bayesfactor_restricted,matrix) S3method(bayesfactor_restricted,predictions) S3method(bayesfactor_restricted,rvar) S3method(bayesfactor_restricted,slopes) diff --git a/R/bayesfactor_restricted.R b/R/bayesfactor_restricted.R index 32fbb31c9..4ecbe30a0 100644 --- a/R/bayesfactor_restricted.R +++ b/R/bayesfactor_restricted.R @@ -217,6 +217,35 @@ bayesfactor_restricted.emmGrid <- function( ) } +#' @rdname bayesfactor_restricted +#' @export +bayesfactor_restricted.matrix <- function( + posterior, + hypothesis, + prior = NULL, + verbose = TRUE, + ... +) { + if (is.null(prior)) { + prior <- posterior + insight::format_warning( + "Prior not specified! ", + "Please specify priors (with column names matching 'posterior')", + " to get meaningful results." + ) + } + + if (is.null(colnames(posterior)) || is.null(colnames(prior))) { + insight::format_error("Posterior / Prior must have column names.") + } + + bayesfactor_restricted.data.frame( + posterior = as.data.frame(posterior), + prior = as.data.frame(prior), + hypothesis = hypothesis + ) +} + #' @export bayesfactor_restricted.emm_list <- bayesfactor_restricted.emmGrid diff --git a/man/bayesfactor_restricted.Rd b/man/bayesfactor_restricted.Rd index cd6692c70..f85f49d93 100644 --- a/man/bayesfactor_restricted.Rd +++ b/man/bayesfactor_restricted.Rd @@ -7,6 +7,7 @@ \alias{bayesfactor_restricted.brmsfit} \alias{bayesfactor_restricted.blavaan} \alias{bayesfactor_restricted.emmGrid} +\alias{bayesfactor_restricted.matrix} \alias{bayesfactor_restricted.data.frame} \title{Bayes Factors (BF) for Order Restricted Models} \usage{ @@ -50,6 +51,14 @@ bf_restricted(posterior, ...) ... ) +\method{bayesfactor_restricted}{matrix}( + posterior, + hypothesis, + prior = NULL, + verbose = TRUE, + ... +) + \method{bayesfactor_restricted}{data.frame}( posterior, hypothesis, From 864c435c50a51889435a926a2481887dc9222193 Mon Sep 17 00:00:00 2001 From: "Mattan S. Ben-Shachar" Date: Sat, 9 May 2026 00:24:06 +0300 Subject: [PATCH 27/36] air --- R/format.R | 88 +++++++++++++++++++++++++++++------------------------- 1 file changed, 47 insertions(+), 41 deletions(-) diff --git a/R/format.R b/R/format.R index 0cbfef169..c2286e569 100644 --- a/R/format.R +++ b/R/format.R @@ -1,12 +1,14 @@ #' @export -format.describe_posterior <- function(x, - cp = NULL, - digits = 2, - format = "text", - ci_string = "CI", - caption = NULL, - subtitles = NULL, - ...) { +format.describe_posterior <- function( + x, + cp = NULL, + digits = 2, + format = "text", + ci_string = "CI", + caption = NULL, + subtitles = NULL, + ... +) { # reshape CI if (is.data.frame(x) && insight::n_unique(x$CI) > 1) { att <- attributes(x) @@ -82,16 +84,17 @@ format.rope <- format.describe_posterior # special handling for bayes factors ------------------ - #' @export -format.bayesfactor_models <- function(x, - digits = 3, - log = FALSE, - show_names = TRUE, - format = "text", - caption = NULL, - exact = TRUE, - ...) { +format.bayesfactor_models <- function( + x, + digits = 3, + log = FALSE, + show_names = TRUE, + format = "text", + caption = NULL, + exact = TRUE, + ... +) { BFE <- x denominator <- attr(BFE, "denominator") grid.type <- attr(BFE, "BF_method") @@ -154,13 +157,15 @@ format.bayesfactor_models <- function(x, #' @export -format.bayesfactor_inclusion <- function(x, - digits = 3, - log = FALSE, - format = "text", - caption = NULL, - exact = TRUE, - ...) { +format.bayesfactor_inclusion <- function( + x, + digits = 3, + log = FALSE, + format = "text", + caption = NULL, + exact = TRUE, + ... +) { priorOdds <- attr(x, "priorOdds") matched <- attr(x, "matched") @@ -207,13 +212,15 @@ format.bayesfactor_inclusion <- function(x, #' @export -format.bayesfactor_restricted <- function(x, - digits = 3, - log = FALSE, - format = "text", - caption = NULL, - exact = TRUE, - ...) { +format.bayesfactor_restricted <- function( + x, + digits = 3, + log = FALSE, + format = "text", + caption = NULL, + exact = TRUE, + ... +) { BFE <- as.data.frame(x) # Format @@ -252,13 +259,15 @@ format.bayesfactor_restricted <- function(x, #' @export -format.bayesfactor_parameters <- function(x, - cp = NULL, - digits = 3, - log = FALSE, - format = "text", - exact = TRUE, - ...) { +format.bayesfactor_parameters <- function( + x, + cp = NULL, + digits = 3, + log = FALSE, + format = "text", + exact = TRUE, + ... +) { null <- attr(x, "hypothesis") direction <- attr(x, "direction") @@ -286,7 +295,6 @@ format.bayesfactor_parameters <- function(x, caption <- c(caption, "blue") } - # format null-value if (length(null) == 1) { null <- insight::format_value(null, digits = digits, protect_integers = TRUE) @@ -294,7 +302,6 @@ format.bayesfactor_parameters <- function(x, null <- insight::format_ci(null[1], null[2], ci = NULL, digits = digits) } - # footer if (is.null(format) || format == "text") { footer <- list( @@ -316,7 +323,6 @@ format.bayesfactor_parameters <- function(x, )) } - # match and split at components if (!is.null(cp) && !all(is.na(match(cp$Parameter, out$Parameter)))) { out <- insight::print_parameters( From fcc2d9b309b63e70c6d90acaaa5463fc6949f5db Mon Sep 17 00:00:00 2001 From: "Mattan S. Ben-Shachar" Date: Sat, 9 May 2026 00:24:21 +0300 Subject: [PATCH 28/36] bf_res show_names --- R/format.R | 5 +++++ R/print.R | 2 ++ 2 files changed, 7 insertions(+) diff --git a/R/format.R b/R/format.R index c2286e569..2f4a56640 100644 --- a/R/format.R +++ b/R/format.R @@ -216,6 +216,7 @@ format.bayesfactor_restricted <- function( x, digits = 3, log = FALSE, + show_names = TRUE, format = "text", caption = NULL, exact = TRUE, @@ -235,6 +236,10 @@ format.bayesfactor_restricted <- function( colnames(BFE)[colnames(BFE) == "p_prior"] <- "P(Prior)" colnames(BFE)[colnames(BFE) == "p_posterior"] <- "P(Posterior)" + if (isTRUE(show_names) && !is.null(rownames(BFE))) { + BFE$Hypothesis <- paste0("[", rownames(BFE), "] ", BFE$Hypothesis) + } + # footer if (is.null(format) || format == "text") { footer <- list( diff --git a/R/print.R b/R/print.R index 6e1a241e7..4738087b1 100644 --- a/R/print.R +++ b/R/print.R @@ -205,6 +205,7 @@ print.bayesfactor_restricted <- function( x, digits = 3, log = FALSE, + show_names = FALSE, caption = "Bayes Factor (Order-Restriction)", ... ) { @@ -212,6 +213,7 @@ print.bayesfactor_restricted <- function( x = x, digits = digits, log = log, + show_names = show_names, caption = caption, ... ) From f1ecb2e3c57600e9e57b21614fae1320aa0117fb Mon Sep 17 00:00:00 2001 From: "Mattan S. Ben-Shachar" Date: Sat, 9 May 2026 00:32:16 +0300 Subject: [PATCH 29/36] Update bayes_factors[WIP].Rmd --- vignettes/bayes_factors[WIP].Rmd | 459 +++++++++++++++++++++++-------- 1 file changed, 349 insertions(+), 110 deletions(-) diff --git a/vignettes/bayes_factors[WIP].Rmd b/vignettes/bayes_factors[WIP].Rmd index b1078b5fe..d1075e10a 100644 --- a/vignettes/bayes_factors[WIP].Rmd +++ b/vignettes/bayes_factors[WIP].Rmd @@ -40,13 +40,16 @@ knitr::opts_chunk$set( ) pkgs <- c( + "effectsize", "bayestestR", - "ggplot2", "see", - "rstanarm", "bridgesampling" - # "BayesFactor", "emmeans", "logspline", "lme4", "ggplot2", - # "see", "insight", "knitr", "effectsize", - + "ggplot2", + "see", + "rstanarm", + "bridgesampling", + "emmeans", + "logspline" ) + if (!all(sapply(pkgs, require, quietly = TRUE, character.only = TRUE))) { knitr::opts_chunk$set(eval = FALSE) } else { @@ -60,7 +63,7 @@ with their separation often marked by their opinion of the **Bayes factor**. In short, one school of thought (e.g., the *Amsterdam school*, led by [E. J. Wagenmakers](https://www.bayesianspectacles.org/)) advocate its use, and emphasize its qualities as a statistical index, while another point to its limits and prefer, instead, the precise description of posterior distributions (using [CIs](https://easystats.github.io/bayestestR/reference/hdi.html), [ROPEs](https://easystats.github.io/bayestestR/reference/rope.html), etc.). -The `bayestestR` package does **not** take a side in this debate, +The `bayestestR` package does **not** take a side in this debate, and offers tools to carry out analysis irrespective of the school you subscribe to. Instead, it strongly supports the notion of an *informed choice*: @@ -70,41 +73,42 @@ Having said that, here's an introduction to Bayes factors :) # The Bayes Factor -There are many ways to conceptualize Bayes factors (BFs), but fundamentally: +There are many ways to conceptualize Bayes factors (BFs), but fundamentally: **BFs are indices of *relative* evidence between two *hypotheses* over another**. -According to Bayes' theorem, -given a the prior probability of some hypothesis $\mathcal{H}$ ($P(\mathcal{H})$) -and some data $D$, -we can find the posterior probability of the hypothesis ($P(\mathcal{H}|D)$) -by accounting for the probability of observing that datum were the hypothesis true ($P(D|\mathcal{H})$, -also known as the *likelihood*):^[normalized by the marginal probability of observing the data, ($P(D)$, which we will soon see is often not needed.] +According to Bayes' theorem, +given a the prior probability of some hypothesis $\mathcal{H}$ ($P(\mathcal{H})$) +and some data $\mathcal{D}$, +we can find the posterior probability of the hypothesis ($P(\mathcal{H|D})$) +by accounting for the probability of observing that datum were the hypothesis true ($P(\mathcal{D|H})$, +also known as the *likelihood*):^[normalized by the marginal probability of observing the data, $P(\mathcal{D})$, which we will soon see is often not needed.] $$ -P(\mathcal{H}|D) = \frac{P(D|\mathcal{H})\times P(\mathcal{H})}{P(D)} +P(\mathcal{H|D}) = \frac{P(\mathcal{D|H})\times P(\mathcal{H})}{P(\mathcal{D})} $$ -Within this context, an hypothesis is formalized through the specification of an a-priori model: -Priors on the parameters ($\Theta$) the define the data generating process. +Within this context, an hypothesis is formalized through the specification of an a-priori model: +Priors on the parameters ($\Theta$) that define the data generating process. If we have two hypothesis, we can find their posterior probability-odds as such: $$ -\underbrace{\frac{P(\mathcal{H}_1|D)}{P(\mathcal{H}_2|D)}}_{\text{Posterior Odds}} = -\underbrace{\frac{P(D|\mathcal{H}_1)}{P(D|\mathcal{H}_2)}}_{\text{Likelihood Ratio}} +\underbrace{\frac{P(\mathcal{H}_1|\mathcal{D})}{P(\mathcal{H}_2|\mathcal{D})}}_{\text{Posterior Odds}} = +\underbrace{\frac{P(\mathcal{D}|\mathcal{H}_1)}{P(\mathcal{D}|\mathcal{H}_2)}}_{\text{Likelihood Ratio}} \times \underbrace{\frac{P(\mathcal{H}_1)}{P(\mathcal{H}_2)}}_{\text{Prior Odds}} $$ -Where the *likelihood ratio* (the middle term) is the *Bayes factor* - +Where the *likelihood ratio* (the middle term) is the ***Bayes factor*** - it is the ***factor*** by which some **prior odds** have been updated _after_ observing the data to **posterior odds**. +This value can also be thought is _predictive terms_ - how well has a formalized prior model predicted the observed data compared to another formalized prior model? Thus, Bayes factors can be calculated in two ways: {#bf-definitions} - As a ratio quantifying **the relative probability of the observed data under each of the two hypotheses**: (In some contexts, these probabilities are also called *marginal likelihoods*.) $$ -BF_{12}=\frac{P(D|\mathcal{H}_1)}{P(D|\mathcal{H}_2)} +BF_{12}=\frac{P(\mathcal{D}|\mathcal{H}_1)}{P(\mathcal{D}|\mathcal{H}_2)} $$ - As **the degree of shift in prior beliefs** about the relative credibility of @@ -116,12 +120,9 @@ BF_{12}=\frac{\text{Posterior Odds}_{12}}{\text{Prior Odds}_{12}} $$ +`{bayestestR}` provides functions for computing Bayes factors in two different contexts: - - -Here we provide functions for computing Bayes factors in two different contexts: - -- **Comparing statistical models that differ on their priors, representing two competing hypotheses** +- **Comparing statistical models that differ on their priors which represent two competing hypotheses** - **Testing single estimates (parameters, coefficients, transformed parameters) within a given model** # 1. Comparing Models using Bayes Factors {#bayesfactor_models} @@ -131,19 +132,19 @@ Let's take a look at the _kid IQ_ dataset from the `{rstanarm}` package. ```{r} data("kidiq", package = "rstanarm") -kidiq <- kidiq[c("kid_score", "mom_hs")] -kidiq$mom_hs <- factor(kidiq$mom_hs, levels = 0:1, labels = c("no", "yes")) +kidiq <- subset(kidiq, select = c(kid_score, mom_hs)) +kidiq <- transform(kidiq, mom_hs = factor(mom_hs, levels = 0:1, labels = c("no", "yes"))) head(kidiq) ``` -We'll be typing to answer a simple question: +We'll be trying to answer a simple question: what is the mean difference in IQ scores between children whose mothers completed high-school and those whose mothers did not complete high school (as indicated by the `mom_hs` variable). -There are many hypothesis we might have about this difference. Let's start by examining: +There are many hypothesis we might have about this difference. Let's start by examining: -- $\mathcal{H}_0$: There's no difference in IQ between the two groups. -- $\mathcal{H}_1$: The difference is probably around 20 point in favor of kids whose mothers completed high school. +- $\mathcal{H}_0$: There's no difference in IQ between the two groups. +- $\mathcal{H}_1$: The difference is probably around 20 point in favor of kids whose mothers completed high school. - $\mathcal{H}_2$: A more conservative hypothesis that the difference, if it exists, is probably no more than about 5 point in either direction. Let's plot these: @@ -151,23 +152,25 @@ Let's plot these: ```{r, echo=FALSE} p_prior0 <- ggplot() + geom_vline(xintercept = 0, linetype = "dashed") + - geom_segment(aes(x = 0, xend = 0, y = 0, yend = 1), - linewidth = 1, color = "royalblue") + - geom_point(aes(x = 0, y = 1), - size = 3, color = "royalblue") + + geom_segment( + aes(x = 0, xend = 0, y = 0, yend = 1), + linewidth = 1, + color = "royalblue" + ) + + geom_point(aes(x = 0, y = 1), size = 3, color = "royalblue") + theme(axis.text.y = element_blank()) + expand_limits(y = 1.5, x = c(-50, 50)) + labs( x = "Difference", y = NULL, - title = expression(H[0]~":"~theta==0) + title = expression(H[0] ~ ":" ~ theta == 0) ) p_prior1 <- ggplot() + stat_function( - geom = "area", + geom = "area", fun = dnorm, - args = list(mean = 20, sd = 10), + args = list(mean = 20, sd = 10), xlim = c(-50, 50), fill = "royalblue", ) + @@ -177,14 +180,14 @@ p_prior1 <- ggplot() + labs( x = "Difference", y = NULL, - title = expression(H[1]~":"~theta%~%norma(20, 10^2)) + title = expression(H[1] ~ ":" ~ theta %~% norma(20, 10^2)) ) p_prior2 <- ggplot() + stat_function( - geom = "area", + geom = "area", fun = dnorm, - args = list(mean = 0, sd = 5), + args = list(mean = 0, sd = 5), xlim = c(-50, 50), fill = "royalblue", ) + @@ -194,11 +197,10 @@ p_prior2 <- ggplot() + labs( x = "Difference", y = NULL, - title = expression(H[2]~":"~theta%~%norma(0, 5^2)) + title = expression(H[2] ~ ":" ~ theta %~% norma(0, 5^2)) ) plots(p_prior0, p_prior1, p_prior2, n_columns = 1) - ``` We can build models with these different priors with `{brms}` or `{rstanarm}`:^[We will be using `{rstanarm}` throughout this vignette, but `bayestestR` also supports `{brms}`, `{blavaan}`, `{rstan}`, `{cmdstanr}`, `{BayesFactor}` and more.] @@ -206,39 +208,47 @@ We can build models with these different priors with `{brms}` or `{rstanarm}`:^[ In any case, note the we will always require _many_ posterior samples for the stability of our BF estimation (typically 10 times more than what we would need for posterior estimation alone; @gronau2020bridgesampling). ```{r} +library(rstanarm) + mod_H0 <- stan_glm( kid_score ~ 1, family = gaussian(), data = kidiq, - - diagnostic_file = file.path(tempdir(), "df0.csv"), # required for BF computation - - chains = 10, iter = 5000, warmup = 1000, - refresh = 0 + + chains = 10, + iter = 5000, + warmup = 1000, + refresh = 0, + # required for BF computation + diagnostic_file = file.path(tempdir(), "df0.csv") ) mod_H1 <- stan_glm( kid_score ~ mom_hs, family = gaussian(), data = kidiq, - + prior = normal(location = 20, scale = 10), - diagnostic_file = file.path(tempdir(), "df1.csv"), - - chains = 10, iter = 5000, warmup = 1000, - refresh = 0 + + chains = 10, + iter = 5000, + warmup = 1000, + refresh = 0, + diagnostic_file = file.path(tempdir(), "df1.csv") ) mod_H2 <- stan_glm( kid_score ~ mom_hs, family = gaussian(), data = kidiq, - + prior = normal(location = 0, scale = 5), - diagnostic_file = file.path(tempdir(), "df2.csv"), - - chains = 10, iter = 5000, warmup = 1000, - refresh = 0 + + chains = 10, + iter = 5000, + warmup = 1000, + refresh = 0, + diagnostic_file = file.path(tempdir(), "df2.csv") ) ``` @@ -255,12 +265,18 @@ print(bfs, show_names = TRUE) ``` -We can see that the both models that allow for a difference between the groups -are much more supported by the data - +We can see that both models that allow for a difference between the groups +are much more supported by the data - with $BF>`r insight::format_value(exp(bfs$log_BF[2]))`$ - compared to the null (intercept only). -Due to the transitive property of Bayes factors, +Note that **interpretation guides** for Bayes factors can be found in the `effectsize` package: + +```{r} +effectsize::interpret_bf(bfs$log_BF[1:2], log = TRUE) +``` + +Due to the transitive property of Bayes factors, we can easily change the reference model to the model representing $\mathcal{H}_2$: ```{r update_models1} @@ -269,7 +285,7 @@ bfs2 <- update(bfs, reference = 2, subset = 1) print(bfs2, show_names = TRUE) ``` -As we can see, the data supports the a-priori model that suggests a positive difference almost 4 times over the model that suggests a small difference. +The data supports the a-priori model that suggests a positive difference almost 4 times over the model that suggests a small difference. We can also get a matrix of Bayes factors of all the pairwise model comparisons: @@ -291,17 +307,17 @@ And indeed both models 1 and 2's posteriors reflect this difference: ```{r, echo=FALSE} plots( - plot(hdi(mod_H1)) + + plot(hdi(mod_H1)) + labs(y = NULL, title = "Model 1") + - expand_limits(x = c(-10, 30)) + + coord_cartesian(xlim = c(-20, 20)) + scale_y_discrete(expand = expansion(0.1, 0)) + guides(fill = "none"), - plot(hdi(mod_H2)) + - expand_limits(x = c(-10, 30)) + + plot(hdi(mod_H2)) + + coord_cartesian(xlim = c(-20, 20)) + scale_y_discrete(expand = expansion(0.1, 0)) + - labs(y = NULL, title = "Model 2"), - - n_columns = 1, + labs(y = NULL, title = "Model 2"), + + n_columns = 1, guides = "collect" ) ``` @@ -310,11 +326,11 @@ Note that these posterior distributions are _very_ similar, but BFs do not compare posterior models - only _a-priori_ models! For this reason, computing BFs only makes sense if we are able to formulate our hypotheses -into distinct priors. +into distinct priors. ## The BIC approximation -It is also possible to compute approximate Bayes factors for the comparison of frequentist models. +It is also possible to compute *approximate* Bayes factors for the comparison of *frequentist* models (😱). This is done by comparing BIC indices, allowing a Bayesian comparison of nested as well as non-nested frequentist models [@wagenmakers2007practical]. @@ -334,9 +350,9 @@ bayesfactor_models(mod_H1f, denominator = mod_H0f) ## Model averaging In the previous section, we discussed the direct comparison of two models to -determine if an hypothesis is supported by the data. -However, in many cases there are too many models to consider, -or perhaps it is not straightforward which models we should be comparing to determine if an effect is supported by the data. +determine if a hypothesis is supported by the data. +However, in many cases there are too many models to consider, +or perhaps it is not straightforward which models we should be comparing to determine if an effect is supported by the data. For such cases, we can use Bayesian model averaging (BMA) to determine the support provided by the data for a parameter or model-term across many models. @@ -351,8 +367,8 @@ produced the observed data than models without predictor $X$?^[A model without predictor $X$ can be thought of as a model in which the parameter(s) of the predictor have been restricted to a null-point of 0.] -These Bayes factors are computed not as the ratios of marginal likelihoods, -but as **the degree of shift in prior beliefs**: +These Bayes factors are computed not as the ratios of marginal likelihoods, +but as **the degree of shift in prior beliefs**: Since each model has a prior probability, it is possible to sum the prior probability of all models that include a predictor of interest (the *prior inclusion probability*), and of all models that do not include that predictor (the *prior exclusion probability*). After the data are observed, and each model @@ -360,12 +376,14 @@ is assigned a posterior probability, we can similarly consider the sums of the posterior models' probabilities to obtain the *posterior inclusion probability* and the *posterior exclusion probability*. The change from prior inclusion odds to the posterior inclusion odds is the **Inclusion Bayes factor** -["$BF_{Inclusion}$"; @clyde2011bayesian]. +[$BF_{Inclusion}$; @clyde2011bayesian]. ```{r} (bfinc <- bayesfactor_inclusion(bfs)) ``` +(`bayesfactor_inclusion()` is meant to provide Bayes Factors per predictor, similar to JASP's *Effects* option.) + We can see that across the 3 models under consideration, models _with_ the `mom_hs` term fit the data `r insight::format_value(exp(bfinc$log_BF))` times more than the model _without_ that term. ### Averaging posteriors {#weighted_posteriors} @@ -376,10 +394,12 @@ also average the **posterior estimate** across models. ```{r} ppp <- weighted_posteriors(mod_H0, mod_H1, mod_H2) -plot(hdi(ppp$mom_hsyes)) +plot(hdi(ppp$mom_hsyes)) + + coord_cartesian(xlim = c(-20, 20)) ``` -This looks a lot like the posterior obtained from the second model, which shouldn't be surprising since about 80% of the averaged posterior comes from the second model. +This looks a lot like the posterior obtained from the second model, +which shouldn't be surprising since about 80% of the averaged posterior comes from the second model. ```{r} attr(ppp, "weights") @@ -389,7 +409,7 @@ attr(ppp, "weights") ## Order restricted models {#bayesfactor_restricted} We've already seen we can formalize hypothesis into distributional priors -(e.g., _the difference is probably no more than about 5 point in either direction._ became $theta \sim Normal(0, 5^2)$). +(e.g., _the difference is probably no more than about 5 point in either direction._ became $theta \sim Normal(0, 5^2)$). These priors are **unrestricted** - that is, **all values** between $-\infty$ and $\infty$ of all parameters in the model have some non-zero credibility (no matter how small; this is true for both the prior and posterior distribution). @@ -397,16 +417,17 @@ matter how small; this is true for both the prior and posterior distribution). But we can also formalize hypotheses as **order restrictions** [@morey_2015_blog; @morey2011bayesinterval]. -For example, we can impose an _additional_ order restriction +For example, we can impose an _additional_ order restriction that the difference _must be positive_, which we can write like this (if we had to): $$ \mathcal{H}_{2r}: theta \sim Normal(0, 5^2)\begin{bmatrix} \infty \\ 0 \end{bmatrix} $$ -By testing the probabilities of these restrictions on prior and posterior samples, -we can see how the probabilities of the restricted distributions change after observing the data. -This can be achieved with `bayesfactor_restricted()`, that compute a Bayes +By testing the probabilities of these restrictions on prior and posterior samples, +we can see how the probabilities of the restricted distributions _change_ after observing the data - +[this change is a Bayes factor](#bf-definitions). +These can be achieved with `bayesfactor_restricted()`, that compute a Bayes factor for these restricted model vs the unrestricted model. ```{r} @@ -416,39 +437,45 @@ bayesfactor_restricted(mod_H2, hypothesis = "mom_hsyes > 0") In other words, the data fits the restricted model (where the difference must be small _and positive_) twice as much as it fits the un-restircted model (where the difference must be small). -We can compare multiple restricted hypotheses. For example: that the difference isn't just positive, it's larger than 5. +We can compare multiple restricted hypotheses. For example: that the difference isn't just positive, it's larger than 4. ```{r} -bf_rstr2 <- bayesfactor_restricted(mod_H2, hypothesis = c( - positive = "mom_hsyes > 0", - strong = "mom_hsyes > 4" -)) - +bf_rstr <- bayesfactor_restricted( + mod_H2, + hypothesis = c( + positive = "mom_hsyes > 0", + strong = "mom_hsyes > 4" + ) +) ``` Here too we can obtain a matrix of BFs between all models: ```{r} -print(as.matrix(bf_rstr2), show_names = TRUE) +print(as.matrix(bf_rstr), show_names = TRUE) ``` -We can see the "strong" model is preferred over both the un-restricted model and the "positive" model. - +We can see the "strong" model is preferred over both the un-restricted model and the "positive" model. Again, we can use the transitive properties of Bayes factors to find the BF comparing $\mathcal{H}_{2r}$ and $\mathcal{H}_1$: $$ -BF_{2r,1} = BF_{2,0} \times BF_{2r,2} = \frac{P(D|\mathcal{H}_{2})}{P(D|\mathcal{H}_0)} \times \frac{P(D|\mathcal{H}_{2r})}{P(D|\mathcal{H}_2)} = \frac{P(D|\mathcal{H}_{2r})}{P(D|\mathcal{H}_0)} +\begin{align} +BF_{2r,1} & = BF_{2,0} \times BF_{2r,2} \\ +& = \frac{P(\mathcal{D}|\mathcal{H}_{2})}{P(\mathcal{D}|\mathcal{H}_0)} +\times \frac{P(\mathcal{D}|\mathcal{H}_{2r})}{P(\mathcal{D}|\mathcal{H}_2)} \\ +& = \frac{P(\mathcal{D}|\mathcal{H}_{2r})}{P(\mathcal{D}|\mathcal{H}_0)} +\end{align} $$ ```{r} BF_2.0 <- as.numeric(bfs)[2] -BF_2r.2 <- as.numeric(bf_rstr) +BF_2r.2 <- as.numeric(bf_rstr)[2] (BF_2r.0 <- BF_2.0 * BF_2r.2) ``` -So the data support the hypothesis that the difference is small but strictly positive +So the data support the hypothesis that the difference is small but strictly positive `r insight::format_value(BF_2r.0)` times more than the hypothesis that the difference is exactly 0. **Because these restrictions are on the prior distribution, they are only appropriate for testing pre-planned (*a priori*) hypotheses, and should not be used for any post hoc comparisons [@morey_2015_blog].** @@ -473,40 +500,253 @@ mod_odor <- stan_glm( score ~ condition, family = gaussian(), data = disgust, - - prior = normal(location = 0, scale = 4), - diagnostic_file = file.path(tempdir(), "df2.csv"), - - chains = 10, iter = 5000, warmup = 1000, - refresh = 0 + + prior = normal(location = 0, scale = 2), + + contrasts = list(condition = "contr.equalprior_pairs"), + + chains = 10, + iter = 5000, + warmup = 1000, + refresh = 0, + diagnostic_file = file.path(tempdir(), "df3.csv") ) + +summary(mod_odor) +``` + +**NOTE**: See the *Specifying Correct Priors for Factors with More Than 2 Levels* appendix below for more details on the contrast coding used here. + +Let's obtain the prior and posterior distributions of the condition means using `posterior_epred()`. + +```{r} +mod_odor.prior <- unupdate(mod_odor) # get the priors-only model + +library(emmeans) + +disgust_means <- emmeans(mod_odor, ~condition) +disgust_means.prior <- emmeans(mod_odor.prior, ~condition) ``` +Our hypothesis is that the moral harshness ratings are lowest in the lemon condition, higher in the control condition, and highest in the sulfur condition - in other words, there is an _order_ of: $\text{lemon} < \text{control} < \text{sulfur}$. + +We can formalize this hypothesis as an order restriction on the means of the three conditions: + ```{r} -hyps <- c( - "control < lemon" = "conditionlemon < 0", - "control < lemon" = "conditionlemon < 0", +bayesfactor_restricted( + posterior = disgust_means, + prior = disgust_means.prior, + hypothesis = "lemon < control & control < sulfur" +) +``` + +We can see that a-priori, this specific ordering of the 3 means has a proability of $\frac{1}{6}$ (1 of 6 possible orderings of 3 values), but after observing the data, this ordering is about ~4 times more likely than any other ordering. + +The transitive properties of Bayes factors can also be used to compute a Bayes factor for **dividing** hypotheses - +that is for two *complementary* opposing one-sided hypotheses [@morey2014simple]. + +For example, above we compared $\mathcal{H}_{+}: \theta > 0$ - *the difference is positive* +to the null $\mathcal{H}_{-}: \theta < 0$: *the difference is negative*: + +$$ +\begin{align} +BF_{+,-} & = BF_{+,0} \times BF_{0,-} \\ +& = \frac{P(\mathcal{D}|\mathcal{H}_{+})}{P(\mathcal{D}|\mathcal{H}_0)} +\times \frac{P(\mathcal{D}|\mathcal{H}_{0})}{P(\mathcal{D}|\mathcal{H}_-)} \\ +& = \frac{P(\mathcal{D}|\mathcal{H}_{+})}{P(\mathcal{D}|\mathcal{H}_{-})} +\end{align} +$$ + + +```{R} +bf_div <- bayesfactor_restricted( + posterior = disgust_means, + prior = disgust_means.prior, + hypothesis = c( + positive = "lemon - sulfur > 0", + negative = "lemon - sulfur < 0" + ) ) -bayesfactor_restricted(mod_odor, hypothesis = hyps) +print(as.matrix(bf_div), show_names = TRUE) ``` -**NOTE**: See the *Specifying Correct Priors for Factors with More Than 2 Levels* appendix below. +The hypothesis that the lemon condition yields lower ratings than the sulfur condition +is about 60 times more supported by the data than +the hypothesis that the lemon condition has higher ratings than the sulfur condition. + +Etc... etc... we can compound as many restrictions as we want, and compare them to each other, or to the unrestricted model, or to the null model, etc. + +--- + +Overall, Bayes factors are a powerful tool for comparing the relative evidence of two _formalized_ hypotheses (i.e., hypotheses that have been formalized into distinct priors). + +Note that Bayes factors are _not_ a tool for comparing ***posterior*** models +(for such comparisons, see the [`{loo}` package](https://mc-stan.org/loo/)) - +and in fact two similar posterior models can have very different BFs if their priors are different. # 2. Testing Models' Parameters with Bayes Factors {#bayesfactor_parameters} +For testing a point null hypothesis (e.g., $\mathcal{H}_0: \theta = 0$) against some alternative non-null hypothesis (e.g., $\mathcal{H}_1: \theta \sim Normal(0, 5^2)$), a nice "short cut" can be used to obtain a Bayes factor - via the Savage-Dickey density ratio [@wagenmakers2010bayesian]. + +If we zoomed-in on the null value $\theta_0$ - what does it mean for the null's credability to have become _lower_ in the posterior distribution? +Well, since the null is less credible, that necessarily means that the alternative is _more_ credible by the same amount! + +Note that for a point null on a continuous parameter, the _probability_ of the null is always 0, and the probability of all other values is 1. However, the _density_ of the null can be non-zero, and it is this density that quantifies the credibility of the null hypothesis. This means that the prior _odds_ of the null vs the alternative are: + +$$ +\text{Prior Odds} = \frac{P(\theta \neq \theta_0)}{P(\theta=\theta_0)} = \frac{1}{P(\theta=\theta_0)} +$$ + +Likewise, the posterior odds of the null vs the alternative are: + +$$ +\text{Posterior Odds} = \frac{P(\theta \neq \theta_0 \mid \mathcal{D})}{P(\theta=\theta_0 \mid \mathcal{D})} + = \frac{1}{P(\theta=\theta_0 \mid \mathcal{D})} \\ +$$ + +Recall that a Bayes factor can be thought of as the degree of shift in the relative credibility of two hypotheses from the prior model to the posterior model: + +$$ +\begin{align} +BF_{10} & = \frac{\text{Posterior Odds}_{10}}{\text{Prior Odds}_{10}} += \frac{\frac{1}{P(\theta=\theta_0 \mid \mathcal{D})}}{\frac{1}{P(\theta=\theta_0)}} = \\ +& = \frac{P(\theta=\theta_0)}{P(\theta=\theta_0 \mid \mathcal{D})} +\end{align} +$$ + +In other words, it is sufficient to compare the density of the null under the prior distribution ($P(\theta=\theta_0)$) with the density of the null under posterior distribution ($P(\theta=\theta_0 \mid \mathcal{D})$) to obtain a Bayes factor comparing the null and alternative hypotheses - the degree to which the null has become more or less credible after observing the data. + +This can be done using the `bayesfactor_parameters()` - let's use it to test the null hypothesis that the difference in IQ between the two groups is exactly 0: + +```{R} +(sddr <- bayesfactor_parameters(mod_H2, null = 0)) +``` + +Looking at the Savage-Dickey density ratio for the `mom_hsyes` parameter, +we can see that the null has become substantially less credible after observing the data - +and therefore the alternative has become _more_ credible. + +```{R} +plot(sddr) +``` + +We can see that the center of the posterior distribution has shifted away from 0 (to around 10), and the density at 0 has become much smaller in the posterior distribution compared to the prior distribution suggesting that the data is less compatible with the null value of 0 that with other values overall. + +Compare the Savage-Dickey density ratio for the `mom_hsyes` parameter +with the Bayes factor comparing `mod_H2` (the alternative) and `mod_H0` (the null): + +```{R} +print(update(bfs, subset = 2), show_names = TRUE) +``` + +Not perfect, but a good approximation. + ### Testing against a null-*region* +One way of operationalizing the null-hypothesis is by setting a null _region_, such +that an effect that falls within this interval would be *practically* equivalent +to the null [@kruschke2010believe]. In our case, that means defining a range of +effects we would consider equal to no difference in IQ between the two groups. +Let's say we consider any difference between -5 and 5 points to be practically equivalent to no difference at all, +we would define our null-region as $\mathcal{H}_0: \theta \in [-5, 5]$. + +The Bayes factor for this null-region can be obtained +by comparing the change in the _relative_ credibility of the null-region $\mathcal{H}_0: \theta \in [-5, 5]$ +and the non-null region $\mathcal{H}_1: \theta \notin [-5, 5]$ from the prior to the posterior distribution - +to achieve this, we combine the logic of the Savage-Dickey density ratio +with the logic of the order-restricted Bayes factor! + +This too can be done with `bayesfactor_parameters()`, by specifying a null-region instead of a point null: + +```{R} +(sddr_region <- bayesfactor_parameters(mod_H2, null = c(-5, 5))) + +plot(sddr_region) +``` + +We can see that the null-region has become much less credible by a factor of >100 after observing the data - +suggesting that data is more compatible with non-null values than with null values, +and therefore the alternative (that the difference is outside of the [-5, 5] range) has become relatively much more credible. + ### Directional hypotheses +We can also compute Bayes factors for directional hypotheses ("one sided"), +if we have a prior hypotheses about the direction of the effect. This is similiar to the _dividing_ Bayes factor discussed above, but we are still comparing the (directional) alternative to the null (not between two directional hypotheses). +This too can be done by setting an *order restriction* on the prior and posterior distributions [@morey2014simple]. +For example, if we have a prior hypothesis that *the difference in IQ between the two groups is positive*, +the alternative will be restricted to the region to the right of the null (point or interval): + +```{R} +(sddr_directional <- bayesfactor_parameters(mod_H2, null = c(-5, 5), direction = "right")) + +plot(sddr_directional) +``` + +As we can see, given that we have an *a priori* assumption about the direction of the difference, the evidence against the null is even stronger. Again, given this order restriction on the alternative hypothesis, the posterior mass has substantially shifted away and outside the null value, giving some extreme evidence against the null and in favor of the alternative. + ### Support intervals and curves {#si} -A continuous extension of the density ratio... +So far we've seen that Bayes factors quantify relative support between competing hypotheses. However, we can also ask: -# Appendices +> **Upon observing the data, the credibility of which of the parameter’s values has increased (or decreased)?** + +For example, we've seen that the point null has become less credible after observing the data, +but we might also ask which values have **gained** credibility given the observed data? +The resulting range of values is called **the support interval** +as it indicates which values are supported by the data [@wagenmakers2018SI]. +We can do this by once again comparing the prior and posterior distributions and checking where the posterior densities are higher than the prior densities. +In `bayestestR`, this can be achieved with the `si()` function: + +```{r} +my_first_si <- si(mod_H2, BF = 1, verbose = FALSE) + +print(my_first_si) +``` + +The argument `BF = 1` indicates that +we want the interval to contain values that have gained support by a factor of at least 1 +(that is, _any_ support at all). + +Note that this is different from a credible interval, which contains values that have high credibility in the posterior distribution, regardless of how much their credibility has changed from the prior distribution: + +```{r} +hdi(mod_H2) +``` + +Visually, we can see that the credibility of all the values within this interval +has increased (and likewise the credibility of all the values outside this +interval has decreased): + +```{r} +plot(my_first_si) +``` + +We can also see the this support interval excludes the point null +(0) - whose credibility we've already seen has decreased by the observed data. +This emphasizes the relationship between the support interval and the Bayes +factor: + +> "The interpretation of such intervals would be analogous to how a frequentist +confidence interval contains all the parameter values that would not have been +rejected if tested at level $\alpha$. For instance, a BF = 1/3 support interval +encloses all values of theta for which the updating factor is not stronger than +3 against." [@wagenmakers2018SI] + +Thus, the choice of BF (the level of support the interval should indicate) +depends on what we want our interval to represent: + +- A $BF = 1$ contains values whose credibility has merely not decreased by +observing the data. +- A $BF > 1$ contains values who received more impressive support from the data. +- A $BF < 1$ contains values whose credibility has *not* been impressively +decreased by observing the data. Testing against values outside this interval +will produce a Bayes factor larger than $1/BF$ in support of the alternative. + +# Appendices ```{r} @@ -518,4 +758,3 @@ A continuous extension of the density ratio... # References - From 68ef698496eec04ad70879a71b05f16f481919d1 Mon Sep 17 00:00:00 2001 From: "Mattan S. Ben-Shachar" Date: Sat, 9 May 2026 01:00:13 +0300 Subject: [PATCH 30/36] close #739 --- DESCRIPTION | 2 +- NAMESPACE | 1 + R/bayesfactor_models.R | 29 +++++++++++++++++++---------- man/distribution.Rd | 5 +++-- 4 files changed, 24 insertions(+), 13 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9e765d5ce..68d7b8617 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -79,7 +79,7 @@ Suggests: betareg, BH, blavaan, - bridgesampling, + bridgesampling (>= 1.2-1), brms, collapse, curl, diff --git a/NAMESPACE b/NAMESPACE index 174199013..d921b9432 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -24,6 +24,7 @@ S3method(as.vector,p_direction) S3method(bayesfactor_inclusion,BFBayesFactor) S3method(bayesfactor_inclusion,bayesfactor_models) S3method(bayesfactor_models,BFBayesFactor) +S3method(bayesfactor_models,CmdStanMCMC) S3method(bayesfactor_models,blavaan) S3method(bayesfactor_models,brmsfit) S3method(bayesfactor_models,default) diff --git a/R/bayesfactor_models.R b/R/bayesfactor_models.R index b35425546..4fcbc4c06 100644 --- a/R/bayesfactor_models.R +++ b/R/bayesfactor_models.R @@ -277,16 +277,26 @@ bayesfactor_models.default <- function(..., denominator = 1, verbose = TRUE) { # Warn n_samps <- sapply(mods, function(x) { alg <- insight::find_algorithm(x) + if (is.null(alg)) { + return(NA_integer_) + } if (is.null(alg$iterations)) { alg$iterations <- alg$sample } (alg$iterations - alg$warmup) * alg$chains }) - if (any(n_samps < 4e4) && verbose) { - insight::format_warning( - "Bayes factors might not be precise.", - "For precise Bayes factors, sampling at least 40,000 posterior samples is recommended." - ) + if (verbose) { + if (any(n_samps < 4e4, na.rm = TRUE)) { + insight::format_warning( + "Bayes factors might not be precise.", + "For precise Bayes factors, sampling at least 40,000 posterior samples is recommended." + ) + } else if (any(is.na(n_samps))) { + insight::format_alert( + "Unable to determine the number of posterior samples.", + "Bayes factors might not be precise." + ) + } } if (inherits(mods[[1]], "blavaan")) { @@ -296,11 +306,7 @@ bayesfactor_models.default <- function(..., denominator = 1, verbose = TRUE) { } else { res <- .bayesfactor_models_stan_REG(mods, denominator, verbose) bf_method <- "marginal likelihoods (bridgesampling)" - unsupported_models <- if (inherits(mods[[1]], c("stanfit", "CmdStanFit"))) { - TRUE - } else { - FALSE - } + unsupported_models <- inherits(mods[[1]], c("stanfit", "CmdStanFit")) } .bf_models_output( @@ -406,6 +412,9 @@ bayesfactor_models.blavaan <- bayesfactor_models.stanreg bayesfactor_models.stanfit <- bayesfactor_models.stanreg +#' @export +bayesfactor_models.CmdStanMCMC <- bayesfactor_models.stanreg + #' @export bayesfactor_models.BFBayesFactor <- function(..., verbose = TRUE) { models <- c(...) diff --git a/man/distribution.Rd b/man/distribution.Rd index 60021f273..97c50b53e 100644 --- a/man/distribution.Rd +++ b/man/distribution.Rd @@ -66,7 +66,8 @@ distribution_uniform(n, min = 0, max = 1, random = FALSE, ...) \item{...}{Arguments passed to or from other methods.} -\item{n}{number of observations.} +\item{n}{number of observations. If \code{length(n) > 1}, the length + is taken to be the number required.} \item{random}{Generate near-perfect or random (simple wrappers for the base R \verb{r*} functions) distributions. When \code{random = FALSE}, these function return @@ -90,7 +91,7 @@ distribution_uniform(n, min = 0, max = 1, random = FALSE, ...) \item{sd}{vector of standard deviations.} -\item{mu}{vector of mean \eqn{\mu}{mu}.} +\item{mu}{alternative parametrization via mean: see \sQuote{Details}.} \item{phi}{Corresponding to \code{glmmTMB}'s implementation of nbinom distribution, where \code{size=mu/phi}.} From 788ac08c0fa7313914680998d3c9725f2a741ad7 Mon Sep 17 00:00:00 2001 From: "Mattan S. Ben-Shachar" Date: Sat, 9 May 2026 01:23:13 +0300 Subject: [PATCH 31/36] cleanup --- R/utils_bayesfactor.R | 7 ++++++- R/weighted_posteriors.R | 20 +++++++------------- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/R/utils_bayesfactor.R b/R/utils_bayesfactor.R index c73b31cdf..3fd9c5b1f 100644 --- a/R/utils_bayesfactor.R +++ b/R/utils_bayesfactor.R @@ -216,12 +216,17 @@ #' @keywords internal -get_parameters.CmdStanFit <- function(x, ...) { +get_parameters.CmdStanFit <- function(x, parameters = NULL, ...) { insight::check_if_installed("cmdstanr") out <- as.data.frame(x$draws(format = "draws_df")) out[c(".chain", ".iteration", ".draw")] <- NULL out[grepl("^lp_", colnames(out))] <- NULL + + if (!is.null(parameters)) { + out <- out[, grepl(parameters, colnames(out)), drop = FALSE] + } + out } diff --git a/R/weighted_posteriors.R b/R/weighted_posteriors.R index 82a97e8a0..3659e9607 100644 --- a/R/weighted_posteriors.R +++ b/R/weighted_posteriors.R @@ -187,10 +187,6 @@ weighted_posteriors.stanreg <- function( model_tab <- .get_model_table(BFMods, priorOdds = prior_odds) postProbs <- model_tab$postProbs - # Compute weighted number of samples - iterations <- min(sapply(Mods, .total_samps)) - weighted_samps <- round(iterations * postProbs) - # extract parameters params <- lapply( Mods, @@ -200,6 +196,10 @@ weighted_posteriors.stanreg <- function( parameters = parameters ) + # Compute weighted number of samples + iterations <- min(sapply(params, nrow)) + weighted_samps <- round(iterations * postProbs) + .weighted_posteriors(params, weighted_samps, missing, mnames) } @@ -209,6 +209,9 @@ weighted_posteriors.brmsfit <- weighted_posteriors.stanreg #' @export weighted_posteriors.blavaan <- weighted_posteriors.stanreg +#' @export +weighted_posteriors.CmdStanMCMC <- weighted_posteriors.stanreg + #' @rdname weighted_posteriors #' @export weighted_posteriors.BFBayesFactor <- function( @@ -298,12 +301,3 @@ weighted_posteriors.BFBayesFactor <- function( attr(res, "weights") <- weights return(res) } - -#' @keywords internal -.total_samps <- function(mod) { - x <- insight::find_algorithm(mod) - if (is.null(x$iterations)) { - x$iterations <- x$sample - } - x$chains * (x$iterations - x$warmup) -} From b2130118b0069fb9ec26066c453dc1309f07e049 Mon Sep 17 00:00:00 2001 From: "Mattan S. Ben-Shachar" Date: Sun, 10 May 2026 09:35:32 +0300 Subject: [PATCH 32/36] Methods for CmdStanFit instead of CmdStanMCMC --- NAMESPACE | 3 ++- R/bayesfactor_models.R | 2 +- R/weighted_posteriors.R | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index d921b9432..bb68628c3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -24,7 +24,7 @@ S3method(as.vector,p_direction) S3method(bayesfactor_inclusion,BFBayesFactor) S3method(bayesfactor_inclusion,bayesfactor_models) S3method(bayesfactor_models,BFBayesFactor) -S3method(bayesfactor_models,CmdStanMCMC) +S3method(bayesfactor_models,CmdStanFit) S3method(bayesfactor_models,blavaan) S3method(bayesfactor_models,brmsfit) S3method(bayesfactor_models,default) @@ -641,6 +641,7 @@ S3method(unupdate,brmsfit_multiple) S3method(unupdate,stanreg) S3method(update,bayesfactor_models) S3method(weighted_posteriors,BFBayesFactor) +S3method(weighted_posteriors,CmdStanFit) S3method(weighted_posteriors,blavaan) S3method(weighted_posteriors,brmsfit) S3method(weighted_posteriors,data.frame) diff --git a/R/bayesfactor_models.R b/R/bayesfactor_models.R index 4fcbc4c06..2b3f60b07 100644 --- a/R/bayesfactor_models.R +++ b/R/bayesfactor_models.R @@ -413,7 +413,7 @@ bayesfactor_models.stanfit <- bayesfactor_models.stanreg #' @export -bayesfactor_models.CmdStanMCMC <- bayesfactor_models.stanreg +bayesfactor_models.CmdStanFit <- bayesfactor_models.stanreg #' @export bayesfactor_models.BFBayesFactor <- function(..., verbose = TRUE) { diff --git a/R/weighted_posteriors.R b/R/weighted_posteriors.R index 3659e9607..fe72a803b 100644 --- a/R/weighted_posteriors.R +++ b/R/weighted_posteriors.R @@ -210,7 +210,7 @@ weighted_posteriors.brmsfit <- weighted_posteriors.stanreg weighted_posteriors.blavaan <- weighted_posteriors.stanreg #' @export -weighted_posteriors.CmdStanMCMC <- weighted_posteriors.stanreg +weighted_posteriors.CmdStanFit <- weighted_posteriors.stanreg #' @rdname weighted_posteriors #' @export From f22967073710aca1e72d75fbc42c503d839904ea Mon Sep 17 00:00:00 2001 From: "Mattan S. Ben-Shachar" Date: Sun, 10 May 2026 12:46:55 +0300 Subject: [PATCH 33/36] drop internal function in favor of insight's method [skip] --- DESCRIPTION | 2 +- R/utils_bayesfactor.R | 16 ---------------- 2 files changed, 1 insertion(+), 17 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 68d7b8617..0dbd1b422 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -66,7 +66,7 @@ Description: Provides utilities to describe posterior Depends: R (>= 3.6) Imports: - insight (>= 1.5.0), + insight (>= 1.5.0.6), datawizard (>= 1.3.1), graphics, methods, diff --git a/R/utils_bayesfactor.R b/R/utils_bayesfactor.R index 3fd9c5b1f..736af3d07 100644 --- a/R/utils_bayesfactor.R +++ b/R/utils_bayesfactor.R @@ -215,22 +215,6 @@ .clean_priors_and_posteriors.CmdStanFit <- .clean_priors_and_posteriors.stanfit -#' @keywords internal -get_parameters.CmdStanFit <- function(x, parameters = NULL, ...) { - insight::check_if_installed("cmdstanr") - - out <- as.data.frame(x$draws(format = "draws_df")) - out[c(".chain", ".iteration", ".draw")] <- NULL - out[grepl("^lp_", colnames(out))] <- NULL - - if (!is.null(parameters)) { - out <- out[, grepl(parameters, colnames(out)), drop = FALSE] - } - - out -} - - # BMA --------------------------------------------------------------------- #' @keywords internal From eec1061aaa607e4dd6aaf1a725435b84583cc721 Mon Sep 17 00:00:00 2001 From: "Mattan S. Ben-Shachar" Date: Mon, 11 May 2026 20:04:41 +0300 Subject: [PATCH 34/36] finalize BF vignette --- vignettes/bayes_factors.Rmd | 1481 ++++++++++++------------------ vignettes/bayes_factors[WIP].Rmd | 760 --------------- vignettes/bibliography.bib | 13 +- 3 files changed, 594 insertions(+), 1660 deletions(-) delete mode 100644 vignettes/bayes_factors[WIP].Rmd diff --git a/vignettes/bayes_factors.Rmd b/vignettes/bayes_factors.Rmd index cd986c397..3c2bb0b41 100644 --- a/vignettes/bayes_factors.Rmd +++ b/vignettes/bayes_factors.Rmd @@ -27,7 +27,9 @@ This vignette can be referred to by citing the following: ```{r setup, include=FALSE} library(knitr) + options(knitr.kable.NA = "", digits = 2) + knitr::opts_chunk$set( echo = TRUE, comment = ">", @@ -38,1195 +40,876 @@ knitr::opts_chunk$set( ) pkgs <- c( - "rstanarm", "BayesFactor", "emmeans", "logspline", "lme4", "ggplot2", - "see", "insight", "knitr", "effectsize", "bayestestR" + "effectsize", + "bayestestR", + "ggplot2", + "see", + "rstanarm", + "bridgesampling", + "emmeans", + "marginaleffects", + "logspline" ) + if (!all(sapply(pkgs, require, quietly = TRUE, character.only = TRUE))) { knitr::opts_chunk$set(eval = FALSE) -} - -set.seed(4) - -if (require("ggplot2") && require("see")) { +} else { theme_set(theme_modern()) } ``` -The adoption of the Bayesian framework for applied statistics, especially in the -social and psychological sciences, seems to be developing in two distinct -directions. One of the key topics marking their separation is their opinion -about the **Bayes factor**. In short, one school of thought (e.g., the *Amsterdam school*, led by [E. J. Wagenmakers](https://www.bayesianspectacles.org/)) -advocate its use, and emphasize its qualities as a statistical index, while -another point to its limits and prefer, instead, the precise description of -posterior distributions (using -[CIs](https://easystats.github.io/bayestestR/reference/hdi.html), -[ROPEs](https://easystats.github.io/bayestestR/reference/rope.html), etc.). +The adoption of the Bayesian framework, especially in the +social and psychological sciences, seems to be developing in two distinct directions, +with their separation often marked by their opinion of the **Bayes factor**. +In short, one school of thought (e.g., the *Amsterdam school*, led by [E. J. Wagenmakers](https://www.bayesianspectacles.org/)) advocate its use, and emphasize its qualities as a statistical index, +while another point to its limits and prefer, instead, the precise description of posterior distributions (using [CIs](https://easystats.github.io/bayestestR/reference/hdi.html), [ROPEs](https://easystats.github.io/bayestestR/reference/rope.html), etc.). -The `bayestestR` package does **not** take a side in this debate, and offers tools to carry out analysis irrespective of the school you subscribe to. Instead, it strongly supports the notion of an *informed choice*: +The `bayestestR` package does **not** take a side in this debate, +and offers tools to carry out analysis irrespective of the school you subscribe to. +Instead, it strongly supports the notion of an *informed choice*: **discover the methods, learn about them, understand them, try them, and decide for yourself**. Having said that, here's an introduction to Bayes factors :) - # The Bayes Factor -**Bayes Factors (BFs) are indices of *relative* evidence of one "model" over another**. - -In their role as a hypothesis testing index, they are to Bayesian framework what -a $p$-value is to the **classical/frequentist framework**. In significance-based -testing, $p$-values are used to assess how unlikely are the observed data if the -**null hypothesis** were true, while in the **Bayesian model selection framework**, -Bayes factors assess evidence for different models, each model corresponding to -a specific hypothesis. +There are many ways to conceptualize Bayes factors (BFs), but fundamentally: +**BFs are indices of *relative* evidence between two *hypotheses* over another**. -According to Bayes' theorem, we can update prior probabilities of some model $M$ -($P(M)$) to posterior probabilities ($P(M|D)$) after observing some datum $D$ by -accounting for the probability of observing that datum given the model -($P(D|M)$, also known as the *likelihood*): +According to Bayes' theorem, +given a the prior probability of some hypothesis $\mathcal{H}$ ($P(\mathcal{H})$) +and some data $\mathcal{D}$, +we can find the posterior probability of the hypothesis ($P(\mathcal{H|D})$) +by accounting for the probability of observing that datum were the hypothesis true ($P(\mathcal{D|H})$, +also known as the *likelihood*):^[normalized by the marginal probability of observing the data, $P(\mathcal{D})$, which we will soon see is often not needed.] $$ -P(M|D) = \frac{P(D|M)\times P(M)}{P(D)} +P(\mathcal{H|D}) = \frac{P(\mathcal{D|H})\times P(\mathcal{H})}{P(\mathcal{D})} $$ -Using this equation, we can compare the probability-odds of two models: +Within this context, an hypothesis is formalized through the specification of an a-priori model: +Priors on the parameters ($\Theta$) that define the data generating process. + +If we have two hypothesis, we can find their posterior probability-odds as such: $$ -\underbrace{\frac{P(M_1|D)}{P(M_2|D)}}_{\text{Posterior Odds}} = -\underbrace{\frac{P(D|M_1)}{P(D|M_2)}}_{\text{Likelihood Ratio}} +\underbrace{\frac{P(\mathcal{H}_1|\mathcal{D})}{P(\mathcal{H}_2|\mathcal{D})}}_{\text{Posterior Odds}} = +\underbrace{\frac{P(\mathcal{D}|\mathcal{H}_1)}{P(\mathcal{D}|\mathcal{H}_2)}}_{\text{Likelihood Ratio}} \times -\underbrace{\frac{P(M_1)}{P(M_2)}}_{\text{Prior Odds}} +\underbrace{\frac{P(\mathcal{H}_1)}{P(\mathcal{H}_2)}}_{\text{Prior Odds}} $$ -Where the *likelihood ratio* (the middle term) is the *Bayes factor* - it is the ***factor*** by which some **prior odds** have been updated after observing the data to **posterior odds**. +Where the *likelihood ratio* (the middle term) is the ***Bayes factor*** - +it is the ***factor*** by which some **prior odds** have been updated _after_ observing the data to **posterior odds**. +This value can also be thought is _predictive terms_ - how well has a formalized prior model predicted the observed data compared to another formalized prior model? -Thus, Bayes factors can be calculated in two ways: +Thus, Bayes factors can be calculated in two ways: {#bf-definitions} -- As a ratio quantifying **the relative probability of the observed data under each of the two models**. (In some contexts, these probabilities are also called *marginal likelihoods*.) +- As a ratio quantifying **the relative probability of the observed data under each of the two hypotheses**: (In some contexts, these probabilities are also called *marginal likelihoods*.) $$ -BF_{12}=\frac{P(D|M_1)}{P(D|M_2)} +BF_{12}=\frac{P(\mathcal{D}|\mathcal{H}_1)}{P(\mathcal{D}|\mathcal{H}_2)} $$ - As **the degree of shift in prior beliefs** about the relative credibility of -two models (since they can be computed by dividing posterior odds by prior +two hypotheses (since they can be computed by dividing posterior odds by prior odds). $$ -BF_{12}=\frac{Posterior~Odds_{12}}{Prior~Odds_{12}} +BF_{12}=\frac{\text{Posterior Odds}_{12}}{\text{Prior Odds}_{12}} $$ -Here we provide functions for computing Bayes factors in two different contexts: -- **testing single parameters (coefficients) within a model** -- **comparing statistical models themselves** +`{bayestestR}` provides functions for computing Bayes factors in two different contexts: + +- **Comparing statistical models that differ on their priors which represent two competing hypotheses** +- **Testing single estimates (parameters, coefficients, transformed parameters) within a given model** -# 1. Testing Models' Parameters with Bayes Factors {#bayesfactor_parameters} +# 1. Comparing Models using Bayes Factors {#bayesfactor_models} -A **Bayes factor for a single parameter** can be used to answer the question: +Let's take a look at the _kid IQ_ dataset from the `{rstanarm}` package. -> "Given the observed data, has the null hypothesis of an absence of an effect -become more or less credible?" +```{r} +data("kidiq", package = "rstanarm") +kidiq <- subset(kidiq, select = c(kid_score, mom_hs)) +kidiq <- transform(kidiq, mom_hs = factor(mom_hs, levels = 0:1, labels = c("no", "yes"))) -```{r deathsticks_fig, echo=FALSE, fig.cap="Bayesian analysis of the Students' (1908) Sleep data set.", fig.align='center', out.width="80%"} -knitr::include_graphics("https://github.com/easystats/bayestestR/raw/main/man/figures/deathsticks.jpg") +head(kidiq) ``` -Let's use the Students' (1908) Sleep data set (`data("sleep")`). The data comes -from a study in which participants were administered a drug and the researchers -assessed the extra hours of sleep that participants slept afterwards. We will -try answering the following research question using Bayes factors: - -> **Given the observed data, has the hypothesis that the drug (the effect of `group`) has no effect on the numbers of hours of extra sleep (variable `extra`) become more of less credible?** +We'll be trying to answer a simple question: +what is the mean difference in IQ scores between children whose mothers completed high-school and those whose mothers did not complete high school (as indicated by the `mom_hs` variable). -```{r sleep_boxplot, echo=FALSE} -library(ggplot2) +There are many hypothesis we might have about this difference. Let's start by examining: -ggplot(sleep, aes(x = group, y = extra, fill = group)) + - geom_boxplot() + - theme_classic() + - theme(legend.position = "none") -``` +- $\mathcal{H}_0$: There's no difference in IQ between the two groups. +- $\mathcal{H}_1$: The difference is probably around 20 point in favor of kids whose mothers completed high school. +- $\mathcal{H}_2$: A more conservative hypothesis that the difference, if it exists, is probably no more than about 5 point in either direction. -The **boxplot** suggests that the second group has a higher number of hours of -extra sleep. *By how much?* +Let's plot these: -Let's fit a simple [Bayesian linear model](https://easystats.github.io/bayestestR/articles/example1.html), with a -prior of $b_{group} \sim N(0, 3)$ (i.e. the prior follows a Gaussian/normal -distribution with $mean = 0$ and $SD = 3$), using -`rstanarm` package: +```{r, echo=FALSE} +p_prior0 <- ggplot() + + geom_vline(xintercept = 0, linetype = "dashed") + + geom_segment( + aes(x = 0, xend = 0, y = 0, yend = 1), + linewidth = 1, + color = "royalblue" + ) + + geom_point(aes(x = 0, y = 1), size = 3, color = "royalblue") + + theme(axis.text.y = element_blank()) + + expand_limits(y = 1.5, x = c(-50, 50)) + + labs( + x = "Difference", + y = NULL, + title = expression(H[0] ~ ":" ~ theta == 0) + ) + +p_prior1 <- ggplot() + + stat_function( + geom = "area", + fun = dnorm, + args = list(mean = 20, sd = 10), + xlim = c(-50, 50), + fill = "royalblue", + ) + + geom_vline(xintercept = 0, linetype = "dashed") + + theme(axis.text.y = element_blank()) + + expand_limits(y = 0.05) + + labs( + x = "Difference", + y = NULL, + title = expression(H[1] ~ ":" ~ theta %~% norma(20, 10^2)) + ) + +p_prior2 <- ggplot() + + stat_function( + geom = "area", + fun = dnorm, + args = list(mean = 0, sd = 5), + xlim = c(-50, 50), + fill = "royalblue", + ) + + geom_vline(xintercept = 0, linetype = "dashed") + + theme(axis.text.y = element_blank()) + + expand_limits(y = 0.05) + + labs( + x = "Difference", + y = NULL, + title = expression(H[2] ~ ":" ~ theta %~% norma(0, 5^2)) + ) + +plots(p_prior0, p_prior1, p_prior2, n_columns = 1) +``` + +We can build models with these different priors with `{brms}` or `{rstanarm}`:^[We will be using `{rstanarm}` throughout this vignette, but `bayestestR` also supports `{brms}`, `{blavaan}`, `{rstan}`, `{cmdstanr}`, `{BayesFactor}` and more.] + +In any case, note the we will always require _many_ posterior samples for the stability of our BF estimation (typically 10 times more than what we would need for posterior estimation alone; @gronau2020bridgesampling). -```{r rstanarm_model, eval = FALSE} -set.seed(123) +```{r} library(rstanarm) -model <- stan_glm( - formula = extra ~ group, - data = sleep, - prior = normal(0, 3, autoscale = FALSE), - chains = 10, iter = 5000, warmup = 1000 -) -``` +mod_H0 <- stan_glm( + kid_score ~ 1, + family = gaussian(), + data = kidiq, -```{r, echo=FALSE} -model <- stan_glm( - formula = extra ~ group, - data = sleep, - prior = normal(0, 3, autoscale = FALSE), + chains = 10, + iter = 5000, + warmup = 1000, refresh = 0, - chains = 4, iter = 2000, warmup = 1000 # to reduce time for build + # required for BF computation + diagnostic_file = file.path(tempdir(), "df0.csv") ) -``` -### Testing against a null-*region* +mod_H1 <- stan_glm( + kid_score ~ mom_hs, + family = gaussian(), + data = kidiq, -One way of operationalizing the null-hypothesis is by setting a null region, such -that an effect that falls within this interval would be *practically* equivalent -to the null [@kruschke2010believe]. In our case, that means defining a range of -effects we would consider equal to the drug having no effect at all. We can then -compute the prior probability of the drug's effect falling *outside this null-region*, -and the prior probability of the drug's effect falling *within the null-region* -to get our *prior odds*. Say any effect smaller than an hour of extra sleep is -practically equivalent to being no effect at all, we would define our prior odds -as: + prior = normal(location = 20, scale = 10), -$$ -\frac -{P(b_{drug} \notin [-1, 1])} -{P(b_{drug} \in [-1, 1])} -$$ + chains = 10, + iter = 5000, + warmup = 1000, + refresh = 0, + diagnostic_file = file.path(tempdir(), "df1.csv") +) -Given our prior has a normal distribution centered at 0 hours with a scale (an -SD) of 3 hours, our priors would look like this: +mod_H2 <- stan_glm( + kid_score ~ mom_hs, + family = gaussian(), + data = kidiq, -```{r, echo=FALSE} -null <- c(-1, 1) -xrange <- c(-10, 10) - -x_vals <- seq(xrange[1], xrange[2], length.out = 400) -d_vals <- dnorm(x_vals, sd = 3) -in_null <- null[1] < x_vals & x_vals < null[2] -range_groups <- rep(0, length(x_vals)) -range_groups[!in_null & x_vals < 0] <- -1 -range_groups[!in_null & x_vals > 0] <- 1 - -ggplot(mapping = aes(x_vals, d_vals, fill = in_null, group = range_groups)) + - geom_area(color = "black", linewidth = 1) + - scale_fill_flat(name = "", labels = c("Alternative", "Null")) + - labs(x = "Drug effect", y = "Density") + - coord_cartesian(ylim = c(0, 0.45)) + - theme_modern() + - theme(legend.position.inside = c(0.2, 0.8)) - -pnull <- diff(pnorm(null, sd = 3)) -prior_odds <- (1 - pnull) / pnull + prior = normal(location = 0, scale = 5), + + chains = 10, + iter = 5000, + warmup = 1000, + refresh = 0, + diagnostic_file = file.path(tempdir(), "df2.csv") +) ``` -and the prior odds would be 2.8. +We can now ask: which a-priori model (each representing a different hypothesis) is more likely to have produced the observed data? -By looking at the posterior distribution, we can now compute the posterior probability of the drug's effect falling *outside the null-region*, and the posterior probability of the drug's effect falling *within the null-region* to get our *posterior odds*: +This is usually done by comparing the marginal likelihoods of two models. In +such a case, the Bayes factor is a measure of the **relative** evidence for one +hypothesis over the other. -$$ -\frac -{P(b_{drug} \notin [-1,1] | Data)} -{P(b_{drug} \in [-1,1] | Data)} -$$ +```{r} +bfs <- bayesfactor_models(mod_H1, mod_H2, denominator = mod_H0, verbose = FALSE) -```{r rstanarm_fit, echo=FALSE} -library(bayestestR) -model_prior <- unupdate(model) -posterior <- insight::get_parameters(model)$group2 -prior <- insight::get_parameters(model_prior)$group2 +print(bfs, show_names = TRUE) +``` -f_post <- logspline::logspline(posterior) -d_vals_post <- logspline::dlogspline(x_vals, f_post) +We can see that both models that allow for a difference between the groups +are much more supported by the data - +with $BF>`r insight::format_value(exp(bfs$log_BF[2]))`$ - +compared to the null (intercept only). -ggplot(mapping = aes(x_vals, d_vals_post, fill = in_null, group = range_groups)) + - geom_area(color = "black", linewidth = 1) + - scale_fill_flat(name = "", labels = c("Alternative", "Null")) + - labs(x = "Drug effect", y = "Density") + - coord_cartesian(ylim = c(0, 0.45)) + - theme_modern() + - theme(legend.position.inside = c(0.2, 0.8)) +Note that **interpretation guides** for Bayes factors can be found in the `effectsize` package: -My_first_BF <- bayesfactor_parameters(model, null = c(-1, 1), prior = model_prior) +```{r} +effectsize::interpret_bf(bfs$log_BF[1:2], log = TRUE) +``` -BF <- as.numeric(My_first_BF)[2] -post_odds <- prior_odds * BF +Due to the transitive property of Bayes factors, +we can easily change the reference model to the model representing $\mathcal{H}_2$: -med_post <- point_estimate(posterior)$Median -``` +```{r update_models1} +bfs2 <- update(bfs, reference = 2, subset = 1) -We can see that the center of the posterior distribution has shifted away from 0 -(to ~1.5). Likewise, the posterior odds are 2.5, which seems to favor **the effect being non-null**. **But**, does this mean the data support the alternative over -the null? Hard to say, since even before the data were observed, the priors -already favored the alternative - so we need to take our priors into account -here! - -Let's compute the Bayes factor as the change from the prior odds to the -posterior odds: $BF_{10} = Odds_{posterior} / Odds_{prior} = 0.9$! This BF -indicates that the data provide 1/0.9 = 1.1 times more evidence for the effect -of the drug being practically nothing than it does for the drug having some -clinically significant effect. Thus, although the center of distribution has -shifted away from 0, and the posterior distribution seems to favor a non-null -effect of the drug, it seems that given the observed data, the probability mass -has *overall* shifted closer to the null interval, making the values in the null -interval more probable! [see *Non-overlapping Hypotheses* in -@morey2011bayesinterval] - -All of this can be achieved with the function `bayesfactor_parameters()`, which -computes a Bayes factor for each of the model's parameters: - -```{r, eval=FALSE} -My_first_BF <- bayesfactor_parameters(model, null = c(-1, 1)) -My_first_BF +print(bfs2, show_names = TRUE) ``` -```{r, echo=FALSE} -print(My_first_BF) -``` +The data supports the a-priori model that suggests a positive difference almost 4 times over the model that suggests a small difference. -We can also plot using the `see` package: +We can also get a matrix of Bayes factors of all the pairwise model comparisons: ```{r} -library(see) -plot(My_first_BF) +print(as.matrix(bfs), show_names = TRUE) ``` -Note that **interpretation guides** for Bayes factors can be found in the `effectsize` package: +Overall, we can see that both models that allow for some non-0 difference are much more supported by the data compared to the 0-difference model. Let's take a look at the data: -```{r} -effectsize::interpret_bf(exp(My_first_BF$log_BF[2]), include_value = TRUE) +```{r, echo=FALSE} +ggplot(kidiq, aes(mom_hs, kid_score, fill = mom_hs, color = mom_hs)) + + geom_violindot() + + geom_boxplot(fill = NA, position = position_nudge(-0.2), width = 0.1) + + labs(x = "Mom completed high-school?", y = "Kids' IQ") + + guides(fill = "none", color = "none") ``` - -### Testing against the *point*-null (0) - -> **What if we don't know what region would be practically equivalent to 0?** - -Or if we just want the null to be exactly zero? Not a problem - as the width of -null region shrinks to a point, the change from the prior probability to the -posterior probability of the null can be estimated by comparing the density of -the null value between the two distributions.^[Note that as the width of null -interval shrinks to zero, the prior probability and posterior probability of the -alternative tends towards 1.00.] This ratio is called the **Savage-Dickey ratio**, -and has the added benefit of also being an approximation of a Bayes factor -comparing the estimated model against a model in which the parameter of interest -has been restricted to a point-null: - -> "[...] the Bayes factor for $H_0$ versus $H_1$ could be obtained by -analytically integrating out the model parameter $\theta$. However, the Bayes -factor may likewise be obtained by only considering $H_1$, and dividing the -height of the posterior for $\theta$ by the height of the prior for $\theta$, at -the point of interest." [@wagenmakers2010bayesian] - -```{r, eval=FALSE} -My_second_BF <- bayesfactor_parameters(model, null = 0) -My_second_BF -``` +And indeed both models 1 and 2's posteriors reflect this difference: ```{r, echo=FALSE} -My_second_BF <- bayesfactor_parameters( - data.frame(group2 = posterior), - data.frame(group2 = prior), - null = 0, - verbose = FALSE +plots( + plot(hdi(mod_H1)) + + labs(y = NULL, title = "Model 1") + + coord_cartesian(xlim = c(-20, 20)) + + scale_y_discrete(expand = expansion(0.1, 0)) + + guides(fill = "none"), + plot(hdi(mod_H2)) + + coord_cartesian(xlim = c(-20, 20)) + + scale_y_discrete(expand = expansion(0.1, 0)) + + labs(y = NULL, title = "Model 2"), + + n_columns = 1, + guides = "collect" ) - -print(My_second_BF) ``` -```{r} -plot(My_second_BF) -``` +Note that these posterior distributions are _very_ similar, +but BFs do not compare posterior models - only _a-priori_ models! -### Directional hypotheses +For this reason, computing BFs only makes sense if we are able to formulate our hypotheses +into distinct priors. -We can also compute Bayes factors for directional hypotheses ("one sided"), if we have a prior hypotheses about the direction of the effect. This can be done by setting an *order restriction* on the prior distribution (which results in an order restriction on the posterior distribution) of the alternative [@morey2014simple]. For example, if we have a prior hypothesis that *the drug has a positive effect on the number of sleep hours*, the alternative will be restricted to the region to the right of the null (point or interval): +## The BIC approximation -```{r savagedickey_one_sided, eval=FALSE} -test_group2_right <- bayesfactor_parameters(model, direction = ">") -test_group2_right -``` +It is also possible to compute *approximate* Bayes factors for the comparison of *frequentist* models (😱). +This is done by comparing BIC indices, allowing a Bayesian comparison +of nested as well as non-nested frequentist models [@wagenmakers2007practical]. -```{r prior_n_post_plot_one_sided, echo=FALSE} -test_group2_right <- bayesfactor_parameters( - data.frame(group2 = posterior), - data.frame(group2 = prior), - null = 0, - direction = ">", - verbose = FALSE -) +Since frequentist modeling does not allow for specification of priors, we are limited to either restricting parameters to 0 or not. -BF <- as.numeric(test_group2_right) +```{r} +mod_H0f <- lm(kid_score ~ 1, data = kidiq) -print(test_group2_right) -``` +mod_H1f <- lm(kid_score ~ mom_hs, data = kidiq) -```{r} -plot(test_group2_right) +bayesfactor_models(mod_H1f, denominator = mod_H0f) ``` -As we can see, given that we have an *a priori* assumption about the direction of the effect (that the effect is positive), **the presence of an effect is 2.8 times more likely than the absence of an effect**, given the observed data (or that the data are 2.8 time more probable under $H_1$ than $H_0$). This indicates that, given the observed data, and a priori hypothesis, the posterior mass has shifted away from the null value, giving some evidence against the null (note that a Bayes factor of 2.8 is still considered quite [weak evidence](https://easystats.github.io/effectsize/reference/interpret_bf.html)). +(Note how similar this approximate BF is to the proper BFs estimated above.) -Thanks to the flexibility of Bayesian framework, it is also possible to compute -a Bayes factor for **dividing** hypotheses - that is, for a null and alternative -that are *complementary*, opposing one-sided hypotheses [@morey2014simple]. -For example, above we compared an alternative of $H_A$: *the drug has a positive effects* to the null $H_0$: *the drug has no effect*. But we can also compare instead the same alternative to its *complementary* hypothesis: $H_{-A}$: *the drug has a negative effects*. +## Model averaging -```{r inteval_div, eval=FALSE} -test_group2_dividing <- bayesfactor_parameters(model, null = c(-Inf, 0)) -test_group2_dividing -``` - -```{r inteval_div2, echo=FALSE} -test_group2_dividing <- bayesfactor_parameters( - data.frame(group2 = posterior), - data.frame(group2 = prior), - null = c(-Inf, 0) -) - -print(test_group2_dividing) -``` +In the previous section, we discussed the direct comparison of two models to +determine if a hypothesis is supported by the data. +However, in many cases there are too many models to consider, +or perhaps it is not straightforward which models we should be comparing to determine if an effect is supported by the data. +For such cases, we can use Bayesian model averaging (BMA) to determine the support +provided by the data for a parameter or model-term across many models. -```{r} -plot(test_group2_dividing) -``` +### Inclusion Bayes factors {#bayesfactor_inclusion} -We can see that this test produces even stronger (more conclusive) evidence than the one-sided vs. point-null test! And indeed, as a rule of thumb, the more specific the two hypotheses are, and the more distinct they are from one another, the more *power* our Bayes factor has! ^[For more, see [this talk by Richard D. Morey, minute 48](https://philstatwars.files.wordpress.com/2020/09/richard_presentation.mp4)] +Inclusion Bayes factors answer the question: -Thanks to the transitivity of Bayes factors, we can also use -`bayesfactor_parameters()` to compare even more types of hypotheses, with some -trickery. For example: +> **Are the observed data more probable under models with a particular predictor, than they are under models without that particular predictor?** -$$ -\underbrace{BF_{0 **Upon observing the data, the credibility of which of the parameter’s values has increased (or decreased)?** +We can see that across the 3 models under consideration, models _with_ the `mom_hs` term fit the data `r insight::format_value(exp(bfinc$log_BF))` times more than the model _without_ that term. -For example, we've seen that the point null has become somewhat less credible -after observing the data, but we might also ask which values have **gained** -credibility given the observed data?. The resulting range of values is called -**the support interval** as it indicates which values are supported by the data -[@wagenmakers2018SI]. We can do this by once again comparing the prior and -posterior distributions and checking where the posterior densities are higher -than the prior densities. +### Averaging posteriors {#weighted_posteriors} -In `bayestestR`, this can be achieved with the `si()` function: +Similar to how we can average evidence for a predictor across models, we can +also average the **posterior estimate** across models. ```{r} -my_first_si <- si(model, BF = 1, verbose = FALSE) +ppp <- weighted_posteriors(mod_H0, mod_H1, mod_H2) -print(my_first_si) +plot(hdi(ppp$mom_hsyes)) + + coord_cartesian(xlim = c(-20, 20)) ``` -The argument `BF = 1` indicates that we want the interval to contain values that -have gained support by a factor of at least 1 (that is, any support at all). - -Visually, we can see that the credibility of all the values within this interval -has increased (and likewise the credibility of all the values outside this -interval has decreased): +This looks a lot like the posterior obtained from the second model, +which shouldn't be surprising since about 80% of the averaged posterior comes from the second model. ```{r} -plot(my_first_si) +attr(ppp, "weights") ``` -We can also see the this support interval (just barely) excludes the point null -(0) - whose credibility we've already seen has decreased by the observed data. -This emphasizes the relationship between the support interval and the Bayes -factor: - -> "The interpretation of such intervals would be analogous to how a frequentist -confidence interval contains all the parameter values that would not have been -rejected if tested at level $\alpha$. For instance, a BF = 1/3 support interval -encloses all values of theta for which the updating factor is not stronger than -3 against." [@wagenmakers2018SI] -Thus, the choice of BF (the level of support the interval should indicate) -depends on what we want our interval to represent: +## Order restricted models {#bayesfactor_restricted} -- A $BF = 1$ contains values whose credibility has merely not decreased by -observing the data. -- A $BF > 1$ contains values who received more impressive support from the data. -- A $BF < 1$ contains values whose credibility has *not* been impressively -decreased by observing the data. Testing against values outside this interval -will produce a Bayes factor larger than $1/BF$ in support of the alternative. - -# 2. Comparing Models using Bayes Factors {#bayesfactor_models} - -Bayes factors can also be used to compare statistical **models**. In this -statistical context, they answer the following question: +We've already seen we can formalize hypothesis into distributional priors +(e.g., _the difference is probably no more than about 5 point in either direction._ became $theta \sim Normal(0, 5^2)$). +These priors are **unrestricted** - that is, **all values** between $-\infty$ +and $\infty$ of all parameters in the model have some non-zero credibility (no +matter how small; this is true for both the prior and posterior distribution). -> **Under which model are the observed data more probable?** +But we can also formalize hypotheses as **order restrictions** [@morey_2015_blog; +@morey2011bayesinterval]. -In other words, which model is more likely to have produced the observed data? -This is usually done by comparing the marginal likelihoods of two models. In -such a case, the Bayes factor is a measure of the **relative** evidence for one -model over the other. +For example, we can impose an _additional_ order restriction +that the difference _must be positive_, which we can write like this (if we had to): -Let's use Bayes factors for model comparison to find a model that best describes -the length of an iris' sepal using the `iris` data set. +$$ +\mathcal{H}_{2r}: theta \sim Normal(0, 5^2)\begin{bmatrix} \infty \\ 0 \end{bmatrix} +$$ -### For Bayesian models (`brms` and `rstanarm`) +By testing the probabilities of these restrictions on prior and posterior samples, +we can see how the probabilities of the restricted distributions _change_ after observing the data - +[this change is a Bayes factor](#bf-definitions). +These can be achieved with `bayesfactor_restricted()`, that compute a Bayes +factor for these restricted model vs the unrestricted model. -**Note: In order to compute Bayes factors for Bayesian models, non-default arguments must be added upon fitting:** +```{r} +bayesfactor_restricted(mod_H2, hypothesis = "mom_hsyes > 0") +``` - - `brmsfit` models **must** have been fitted with `save_pars = save_pars(all = TRUE)` - - `stanreg` models **must** have been fitted with a defined `diagnostic_file`. -Let's first fit 5 Bayesian regressions with `brms` to predict `Sepal.Length`: +In other words, the data fits the restricted model (where the difference must be small _and positive_) twice as much as it fits the un-restircted model (where the difference must be small). -```{r brms_disp, eval = FALSE} -library(brms) +We can compare multiple restricted hypotheses. For example: that the difference isn't just positive, it's larger than 4. -# intercept only model -m0 <- brm(Sepal.Length ~ 1, - data = iris, - prior = - set_prior("student_t(3, 6, 6)", class = "Intercept") + - set_prior("student_t(3, 0, 6)", class = "sigma"), - chains = 10, iter = 5000, warmup = 1000, - save_pars = save_pars(all = TRUE) +```{r} +bf_rstr <- bayesfactor_restricted( + mod_H2, + hypothesis = c( + positive = "mom_hsyes > 0", + strong = "mom_hsyes > 4" + ) ) +``` -# Petal.Length only -m1 <- brm(Sepal.Length ~ Petal.Length, - data = iris, - prior = - set_prior("student_t(3, 6, 6)", class = "Intercept") + - set_prior("student_t(3, 0, 6)", class = "sigma") + - set_prior("normal(0, 1)", coef = "Petal.Length"), - chains = 10, iter = 5000, warmup = 1000, - save_pars = save_pars(all = TRUE) -) +Here too we can obtain a matrix of BFs between all models: -# Species only -m2 <- brm(Sepal.Length ~ Species, - data = iris, - prior = - set_prior("student_t(3, 6, 6)", class = "Intercept") + - set_prior("student_t(3, 0, 6)", class = "sigma") + - set_prior("normal(0, 3)", coef = c("Speciesversicolor", "Speciesvirginica")), - chains = 10, iter = 5000, warmup = 1000, - save_pars = save_pars(all = TRUE) -) +```{r} +print(as.matrix(bf_rstr), show_names = TRUE) +``` -# Species + Petal.Length model -m3 <- brm(Sepal.Length ~ Species + Petal.Length, - data = iris, - prior = - set_prior("student_t(3, 6, 6)", class = "Intercept") + - set_prior("student_t(3, 0, 6)", class = "sigma") + - set_prior("normal(0, 1)", coef = "Petal.Length") + - set_prior("normal(0, 3)", coef = c("Speciesversicolor", "Speciesvirginica")), - chains = 10, iter = 5000, warmup = 1000, - save_pars = save_pars(all = TRUE) -) +We can see the "strong" model is preferred over both the un-restricted model and the "positive" model. -# full interactive model -m4 <- brm(Sepal.Length ~ Species * Petal.Length, - data = iris, - prior = - set_prior("student_t(3, 6, 6)", class = "Intercept") + - set_prior("student_t(3, 0, 6)", class = "sigma") + - set_prior("normal(0, 1)", coef = "Petal.Length") + - set_prior("normal(0, 3)", coef = c("Speciesversicolor", "Speciesvirginica")) + - set_prior("normal(0, 2)", coef = c("Speciesversicolor:Petal.Length", "Speciesvirginica:Petal.Length")), - chains = 10, iter = 5000, warmup = 1000, - save_pars = save_pars(all = TRUE) -) -``` +Again, we can use the transitive properties of Bayes factors to find the BF comparing $\mathcal{H}_{2r}$ and $\mathcal{H}_0$: -We can now compare these models with the `bayesfactor_models()` function, using -the `denominator` argument to specify the model against which the rest of the -models will be compared (in this case, the intercept-only model): +$$ +\begin{align} +BF_{2r,0} & = BF_{2,0} \times BF_{2r,2} \\ +& = \frac{P(\mathcal{D}|\mathcal{H}_{2})}{P(\mathcal{D}|\mathcal{H}_0)} +\times \frac{P(\mathcal{D}|\mathcal{H}_{2r})}{P(\mathcal{D}|\mathcal{H}_2)} \\ +& = \frac{P(\mathcal{D}|\mathcal{H}_{2r})}{P(\mathcal{D}|\mathcal{H}_0)} +\end{align} +$$ -```{r brms_models_disp, eval = FALSE} -library(bayestestR) +```{r} +BF_2.0 <- as.numeric(bfs)[2] +BF_2r.2 <- as.numeric(bf_rstr)[2] -comparison <- bayesfactor_models(m1, m2, m3, m4, denominator = m0) -comparison +(BF_2r.0 <- BF_2.0 * BF_2r.2) ``` -```{r, echo = FALSE} -comparison <- structure( - list( - Model = c("Petal.Length", "Species", "Species + Petal.Length", "Species * Petal.Length", "1"), - log_BF = c(101.556419030653, 64.2903334815192, 122.864721399001, 119.712908243647, 0) - ), - class = c("bayesfactor_models", "see_bayesfactor_models", "data.frame"), - row.names = c("m1", "m2", "m3", "m4", "m0"), - denominator = 5L, - BF_method = "marginal likelihoods (bridgesampling)", - unsupported_models = FALSE -) -print(comparison) -``` +So the data support the hypothesis that the difference is small but strictly positive +`r insight::format_value(BF_2r.0)` times more than the hypothesis that the difference is exactly 0. +**Because these restrictions are on the prior distribution, they are only appropriate for testing pre-planned (*a priori*) hypotheses, and should not be used for any post hoc comparisons [@morey_2015_blog].** -We can see that the `Species + Petal.Length` model is the best model - with $BF=2\times 10^{53}$ compared to the null (intercept only). +--- -Due to the transitive property of Bayes factors, we can easily change the reference model to the full `Species * Petal.Length` model: +We are not limited to a single order restrictions - we can compound them to create complex restrictions. -```{r update_models1} -update(comparison, reference = 4) -``` +Let's look at the [`disgust` dataset](http://bayesfactor.blogspot.com/2015/01/multiple-comparisons-with-bayesfactor-2.html), were 150 individuals rated "moral harshness" of undocumented migrants in one of three conditions: no odor, clean odor (lemon), or disgusting (sulfur) odor during questionnaire. -As we can see, the `Species + Petal.Length` model is also favored compared to the `Species * Petal.Length` model, though to several orders of magnitude less - is is only supported 23.38 times more!) +```{r} +data("disgust", package = "bayestestR") -We can also change the reference model to the `Species` model: -```{r update_models2} -update(comparison, reference = 2) +str(disgust) ``` -Notice that, in the Bayesian framework the compared models *do not* need to be -nested models, as happened here when we compared the `Petal.Length`-only model -to the `Species`-only model (something that cannot be done in the frequentist -framework, where compared models must be nested in one another). -We can also get a matrix of Bayes factors of all the pairwise model comparisons: +Let's build our simple one-way-ANOVA-like model: ```{r} -as.matrix(comparison) -``` - -**NOTE:** In order to correctly and precisely estimate Bayes Factors, you always need the 4 P's: **P**roper **P**riors ^[[Robert, 2016](https://doi.org/10.1016/j.jmp.2015.08.002); [Kass & Raftery, 1993](https://doi.org/10.1080/01621459.1995.10476572); [Fernández, Ley, & Steel, 2001](https://doi.org/10.1016/S0304-4076(00)00076-2)], and a **P**lentiful **P**osterior ^[[Gronau, Singmann, & Wagenmakers, 2017](https://arxiv.org/abs/1710.08162)]. - -### For Frequentist models via the BIC approximation +mod_odor <- stan_glm( + score ~ condition, + family = gaussian(), + data = disgust, -It is also possible to compute Bayes factors for the comparison of frequentist -models. This is done by comparing BIC measures, allowing a Bayesian comparison -of nested as well as non-nested frequentist models [@wagenmakers2007practical]. + prior = normal(location = 0, scale = 2), -Let's try it out on some **linear mixed-effects models**: + contrasts = list(condition = "contr.equalprior_pairs"), -```{r lme4_models} -library(lme4) + chains = 10, + iter = 5000, + warmup = 1000, + refresh = 0, + diagnostic_file = file.path(tempdir(), "df3.csv") +) +``` -# define models with increasing complexity -m0 <- lmer(Sepal.Length ~ (1 | Species), data = iris) -m1 <- lmer(Sepal.Length ~ Petal.Length + (1 | Species), data = iris) -m2 <- lmer(Sepal.Length ~ Petal.Length + (Petal.Length | Species), data = iris) -m3 <- lmer(Sepal.Length ~ Petal.Length + Petal.Width + (Petal.Length | Species), data = iris) -m4 <- lmer(Sepal.Length ~ Petal.Length * Petal.Width + (Petal.Length | Species), data = iris) +**NOTE**: See the *Specifying Correct Priors for Factors with More Than 2 Levels* appendix below for more details on the contrast coding used here. -# model comparison -bayesfactor_models(m1, m2, m3, m4, denominator = m0) -``` +Let's obtain the prior and posterior distributions of the condition means using `posterior_epred()`. -### Order restricted models {#bayesfactor_restricted} +```{r} +mod_odor.prior <- unupdate(mod_odor) # get the priors-only model -As stated above when discussing one-sided hypothesis tests, we can create new -models by imposing order restrictions on a given model. For example, consider -the following model, in which we predict the length of an iris' sepal from the -length of its petal, as well as from its species, with priors: -- $b_{petal} \sim N(0,2)$ -- $b_{versicolors}\ \&\ b_{virginica} \sim N(0,1.2)$ +library(emmeans) -```{r, eval=FALSE} -iris_model <- stan_glm(Sepal.Length ~ Species + Petal.Length, - data = iris, - prior = normal(0, c(2, 1.2, 1.2), autoscale = FALSE), - chains = 10, iter = 5000, warmup = 1000 -) +disgust_means <- emmeans(mod_odor, ~condition) +disgust_means.prior <- emmeans(mod_odor.prior, ~condition) ``` -```{r, echo=FALSE} -iris_model <- stan_glm(Sepal.Length ~ Species + Petal.Length, - data = iris, - prior = normal(0, c(2, 1.2, 1.2), autoscale = FALSE), - chains = 4, iter = 2000, warmup = 1000, # to reduce build time - refresh = 0 -) -``` +Our hypothesis is that the moral harshness ratings are lowest in the lemon condition, higher in the control condition, and highest in the sulfur condition - in other words, there is an _order_ of: $\text{lemon} < \text{control} < \text{sulfur}$. -These priors are **unrestricted** - that is, **all values** between $-\infty$ -and $\infty$ of all parameters in the model have some non-zero credibility (no -matter how small; this is true for both the prior and posterior distribution). -Subsequently, *a priori* the ordering of the parameters relating to the iris -species can have any ordering, such that *a priori* setosa can have larger -sepals than virginica, but it is also possible for virginica to have larger -sepals than setosa! - -Does it make sense to let our priors cover all of these possibilities? That -depends on our *prior* knowledge or hypotheses. For example, even a novice -botanist will assume that it is unlikely that petal length will be *negatively* -associated with sepal length - an iris with longer petals is likely larger, and -thus will also have a longer sepal. And an expert botanist will perhaps assume -that setosas have smaller sepals than both versicolors and virginica. - -These priors can be formulated as **restricted** priors [@morey_2015_blog; -@morey2011bayesinterval]: - -1. The novice botanist: $b_{petal} > 0$ -2. The expert botanist: $b_{versicolors} > 0\ \&\ b_{virginica} > 0$ - -By testing these restrictions on prior and posterior samples, we can see how the -probabilities of the restricted distributions change after observing the data. -This can be achieved with `bayesfactor_restricted()`, that compute a Bayes -factor for these restricted model vs the unrestricted model. Let's first specify -these restrictions as logical conditions: +We can formalize this hypothesis as an order restriction on the means of the three conditions: ```{r} -botanist_hypotheses <- c( - "Petal.Length > 0", - "(Speciesversicolor > 0) & (Speciesvirginica > 0)" +bayesfactor_restricted( + posterior = disgust_means, + prior = disgust_means.prior, + hypothesis = "lemon < control & control < sulfur" ) ``` -Let's test these hypotheses: - -```{r} -model_prior <- unupdate(iris_model) - -botanist_BFs <- bayesfactor_restricted( - posterior = iris_model, - prior = model_prior, - hypothesis = botanist_hypotheses -) - -print(botanist_BFs) -``` +We can see that a-priori, this specific ordering of the 3 means has a proability of $\frac{1}{6}$ (1 of 6 possible orderings of 3 values), but after observing the data, this ordering is about ~4 times more likely than any other ordering. -We can see that the novice botanist's hypothesis gets a Bayes factor of ~2, -indicating the data provides twice as much evidence for a model in which petal -length is restricted to be positively associated with sepal length than for a -model with not such restriction. - -What about our expert botanist? He seems to have failed miserably, with a BF -favoring the *unrestricted* model many many times over. How is -this possible? It seems that when *controlling for petal length*, versicolor and -virginica actually have shorter sepals! - -```{r plot_iris, echo=FALSE} -ggplot(iris, aes(Petal.Length, Sepal.Length, color = Species)) + - geom_point() + - scale_color_flat() + - theme(legend.position.inside = c(0.2, 0.8)) -``` +The transitive properties of Bayes factors can also be used to compute a Bayes factor for **dividing** hypotheses - +that is for two *complementary* opposing one-sided hypotheses [@morey2014simple]. -Note that these Bayes factors compare the restricted model to the unrestricted -model. If we wanted to compare the restricted model to the null model, we could -use the transitive property of Bayes factors like so: +For example, above we compared $\mathcal{H}_{+}: \theta > 0$ - *the difference is positive* +to the null $\mathcal{H}_{-}: \theta < 0$: *the difference is negative*: $$ -BF_{\text{restricted vs. NULL}} = \frac -{BF_{\text{restricted vs. un-restricted}}} -{BF_{\text{un-restricted vs NULL}}} +\begin{align} +BF_{+,-} & = BF_{+,0} \times BF_{0,-} \\ +& = \frac{P(\mathcal{D}|\mathcal{H}_{+})}{P(\mathcal{D}|\mathcal{H}_0)} +\times \frac{P(\mathcal{D}|\mathcal{H}_{0})}{P(\mathcal{D}|\mathcal{H}_-)} \\ +& = \frac{P(\mathcal{D}|\mathcal{H}_{+})}{P(\mathcal{D}|\mathcal{H}_{-})} +\end{align} $$ -**Because these restrictions are on the prior distribution, they are only appropriate for testing pre-planned (*a priori*) hypotheses, and should not be used for any post hoc comparisons [@morey_2015_blog].** - -**NOTE**: See the *Specifying Correct Priors for Factors with More Than 2 Levels* appendix below. -# 3. Bayesian Model Averaging - -In the previous section, we discussed the direct comparison of two models to -determine if an effect is supported by the data. However, in many cases there -are too many models to consider, or perhaps it is not straightforward which -models we should compare to determine if an effect is supported by the data. For -such cases, we can use Bayesian model averaging (BMA) to determine the support -provided by the data for a parameter or term across many models. +```{R} +bf_div <- bayesfactor_restricted( + posterior = disgust_means, + prior = disgust_means.prior, + hypothesis = c( + positive = "lemon - sulfur > 0", + negative = "lemon - sulfur < 0" + ) +) -### Inclusion Bayes factors {#bayesfactor_inclusion} +print(as.matrix(bf_div), show_names = TRUE) +``` -Inclusion Bayes factors answer the question: -> **Are the observed data more probable under models with a particular predictor, than they are under models without that particular predictor?** +The hypothesis that the lemon condition yields lower ratings than the sulfur condition +is about 60 times more supported by the data than +the hypothesis that the lemon condition has higher ratings than the sulfur condition. -In other words, on average, are models with predictor $X$ more likely to have -produced the observed data than models without predictor $X$?^[A model without -predictor $X$ can be thought of as a model in which the parameter(s) of the -predictor have been restricted to a null-point of 0.] +Etc... etc... we can compound as many restrictions as we want, and compare them to each other, or to the unrestricted model, or to the null model, etc. -Since each model has a prior probability, it is possible to sum the prior -probability of all models that include a predictor of interest (the *prior inclusion probability*), and of all models that do not include that predictor -(the *prior exclusion probability*). After the data are observed, and each model -is assigned a posterior probability, we can similarly consider the sums of the -posterior models' probabilities to obtain the *posterior inclusion probability* -and the *posterior exclusion probability*. Once again, the change from prior -inclusion odds to the posterior inclusion odds is the **Inclusion Bayes factor** -["$BF_{Inclusion}$"; @clyde2011bayesian]. +--- -Lets use the `brms` example from above: +Overall, Bayes factors are a powerful tool for comparing the relative evidence of two _formalized_ hypotheses (i.e., hypotheses that have been formalized into distinct priors). -```{r inclusion_brms} -bayesfactor_inclusion(comparison) -``` +Note that Bayes factors are _not_ a tool for comparing ***posterior*** models +(for such comparisons, see the [`{loo}` package](https://mc-stan.org/loo/)) - +and in fact two similar posterior models can have very different BFs if their priors are different. -If we examine the interaction term's inclusion Bayes factor, we can see that across all 5 models, a model with the term is *on average* (1/0.171) 5.84 times less supported than a model without the term. -Note that `Species`, a factor represented in the model with several parameters, gets a *single* Bayes factor - inclusion Bayes factors are given **per predictor**! -We can also compare only matched models - such that averaging is done only -across models that (1) do not include any interactions with the predictor of -interest; (2) for interaction predictors, averaging is done only across models -that contain the main effects from which the interaction predictor is comprised -(see explanation for why you might want to do this -[here](https://www.cogsci.nl/blog/interpreting-bayesian-repeated-measures-in-jasp)). +# 2. Testing Models' Parameters with Bayes Factors {#bayesfactor_parameters} -```{r inclusion_brms2} -bayesfactor_inclusion(comparison, match_models = TRUE) -``` +For testing a point null hypothesis (e.g., $\mathcal{H}_0: \theta = 0$) against some alternative non-null hypothesis (e.g., $\mathcal{H}_1: \theta \sim Normal(0, 5^2)$), a nice "short cut" can be used to obtain a Bayes factor - via the Savage-Dickey density ratio [@wagenmakers2010bayesian]. -### Comparison with JASP +If we zoomed-in on the null value $\theta_0$ - what does it mean for the null's credability to have become _lower_ in the posterior distribution? +Well, since the null is less credible, that necessarily means that the alternative is _more_ credible by the same amount! -`bayesfactor_inclusion()` is meant to provide Bayes Factors per predictor, -similar to JASP's *Effects* option. +Note that for a point null on a continuous parameter, the _probability_ of the null is always 0, and the probability of all other values is 1. However, the _density_ of the null can be non-zero, and it is this density that quantifies the credibility of the null hypothesis. This means that the prior _odds_ of the null vs the alternative are: -Let's compare the two. -Note that for this comparison we will use the `{BayesFactor}` package, which is what _JASP_ uses under the hood. -(Note that this package used different model-parameterization and different default prior-specifications compared to _Stan_-based packages.) +$$ +\text{Prior Odds} = \frac{P(\theta \neq \theta_0)}{P(\theta=\theta_0)} = \frac{1}{P(\theta=\theta_0)} +$$ -1. **Across all models**: +Likewise, the posterior odds of the null vs the alternative are: -```{r JASP_all} -library(BayesFactor) -data(ToothGrowth) -ToothGrowth$dose <- as.factor(ToothGrowth$dose) +$$ +\text{Posterior Odds} = \frac{P(\theta \neq \theta_0 \mid \mathcal{D})}{P(\theta=\theta_0 \mid \mathcal{D})} + = \frac{1}{P(\theta=\theta_0 \mid \mathcal{D})} \\ +$$ -BF_ToothGrowth <- anovaBF(len ~ dose * supp, ToothGrowth, progress = FALSE) +Recall that a Bayes factor can be thought of as the degree of shift in the relative credibility of two hypotheses from the prior model to the posterior model: -bayesfactor_inclusion(BF_ToothGrowth) -``` +$$ +\begin{align} +BF_{10} & = \frac{\text{Posterior Odds}_{10}}{\text{Prior Odds}_{10}} += \frac{\frac{1}{P(\theta=\theta_0 \mid \mathcal{D})}}{\frac{1}{P(\theta=\theta_0)}} = \\ +& = \frac{P(\theta=\theta_0)}{P(\theta=\theta_0 \mid \mathcal{D})} +\end{align} +$$ -```{r JASP_all_fig, echo=FALSE} -knitr::include_graphics("https://github.com/easystats/bayestestR/raw/main/man/figures/JASP1.jpg") -``` +In other words, it is sufficient to compare the density of the null under the prior distribution ($P(\theta=\theta_0)$) with the density of the null under posterior distribution ($P(\theta=\theta_0 \mid \mathcal{D})$) to obtain a Bayes factor comparing the null and alternative hypotheses - the degree to which the null has become more or less credible after observing the data. -2. **Across matched models**: +This can be done using the `bayesfactor_parameters()` - let's use it to test the null hypothesis that the difference in IQ between the two groups is exactly 0: -```{r JASP_matched} -bayesfactor_inclusion(BF_ToothGrowth, match_models = TRUE) +```{R} +(sddr <- bayesfactor_parameters(mod_H2, null = 0)) ``` +Looking at the Savage-Dickey density ratio for the `mom_hsyes` parameter, +we can see that the null has become substantially less credible after observing the data - +and therefore the alternative has become _more_ credible. -```{r JASP_matched_fig, echo=FALSE} -knitr::include_graphics("https://github.com/easystats/bayestestR/raw/main/man/figures/JASP2.jpg") +```{R} +plot(sddr) ``` -3. **With Nuisance Effects**: +We can see that the center of the posterior distribution has shifted away from 0 (to around 10), and the density at 0 has become much smaller in the posterior distribution compared to the prior distribution suggesting that the data is less compatible with the null value of 0 that with other values overall. -We'll add `dose` to the null model in JASP, and do the same in `R`: +Compare the Savage-Dickey density ratio for the `mom_hsyes` parameter +with the Bayes factor comparing `mod_H2` (the alternative) and `mod_H0` (the null): -```{r JASP_Nuisance} -BF_ToothGrowth_against_dose <- BF_ToothGrowth[3:4] / BF_ToothGrowth[2] # OR: -# update(bayesfactor_models(BF_ToothGrowth), -# subset = c(4, 5), -# reference = 3) -BF_ToothGrowth_against_dose +```{R} +print(update(bfs, subset = 2), show_names = TRUE) +``` +Not perfect, but a good approximation. -bayesfactor_inclusion(BF_ToothGrowth_against_dose) -``` +### Testing against a null-*region* -```{r JASP_Nuisance_fig, echo=FALSE} -knitr::include_graphics("https://github.com/easystats/bayestestR/raw/main/man/figures/JASP3.jpg") -``` +One way of operationalizing the null-hypothesis is by setting a null _region_, such +that an effect that falls within this interval would be *practically* equivalent +to the null [@kruschke2010believe]. In our case, that means defining a range of +effects we would consider equal to no difference in IQ between the two groups. +Let's say we consider any difference between -5 and 5 points to be practically equivalent to no difference at all, +we would define our null-region as $\mathcal{H}_0: \theta \in [-5, 5]$. -## Averaging posteriors {#weighted_posteriors} +The Bayes factor for this null-region can be obtained +by comparing the change in the _relative_ credibility of the null-region $\mathcal{H}_0: \theta \in [-5, 5]$ +and the non-null region $\mathcal{H}_1: \theta \notin [-5, 5]$ from the prior to the posterior distribution - +to achieve this, we combine the logic of the Savage-Dickey density ratio +with the logic of the order-restricted Bayes factor! -Similar to how we can average evidence for a predictor across models, we can -also average the **posterior estimate** across models. This is useful in -situations where Bayes factors seem to support a null effect, yet the *HDI* for -the alternative excludes the null value (also see `si()` described above). - -For example, looking at Motor *Trend Car Road Tests* (`data(mtcars)`), we would -naturally predict miles/gallon (`mpg`) from transition type (`am`) and weight -(`wt`), but what about number of carburetors (`carb`)? Is this a good predictor? - -We can determine this by comparing the following models: - -```{r, eval=FALSE} -mod <- stan_glm(mpg ~ wt + am, - data = mtcars, - prior = normal(0, c(10, 10), autoscale = FALSE), - chains = 10, iter = 5000, warmup = 1000, - diagnostic_file = file.path(tempdir(), "df1.csv"), - refresh = 0 -) +This too can be done with `bayesfactor_parameters()`, by specifying a null-region instead of a point null: -mod_carb <- stan_glm(mpg ~ wt + am + carb, - data = mtcars, - prior = normal(0, c(10, 10, 20), autoscale = FALSE), - chains = 10, iter = 5000, warmup = 1000, - diagnostic_file = file.path(tempdir(), "df0.csv"), - refresh = 0 -) +```{R} +(sddr_region <- bayesfactor_parameters(mod_H2, null = c(-5, 5))) -BF_carb <- bayesfactor_models(mod_carb, denominator = mod, verbose = FALSE) -BF_carb +plot(sddr_region) ``` -```{r, echo=FALSE} -mod <- stan_glm(mpg ~ wt + am, - data = mtcars, - prior = normal(0, c(10, 10), autoscale = FALSE), - chains = 4, iter = 2000, warmup = 1000, # to reduce build time - diagnostic_file = file.path(tempdir(), "df1.csv"), - refresh = 0 -) +We can see that the null-region has become much less credible by a factor of >100 after observing the data - +suggesting that data is more compatible with non-null values than with null values, +and therefore the alternative (that the difference is outside of the [-5, 5] range) has become relatively much more credible. -mod_carb <- stan_glm(mpg ~ wt + am + carb, - data = mtcars, - prior = normal(0, c(10, 10, 20), autoscale = FALSE), - chains = 4, iter = 2000, warmup = 1000, # to reduce build time - diagnostic_file = file.path(tempdir(), "df0.csv"), - refresh = 0 -) - -BF_carb <- bayesfactor_models(mod_carb, denominator = mod, verbose = FALSE) -BF_carb -``` +### Directional hypotheses +We can also compute Bayes factors for directional hypotheses ("one sided"), +if we have a prior hypotheses about the direction of the effect. This is similiar to the _dividing_ Bayes factor discussed above, but we are still comparing the (directional) alternative to the null (not between two directional hypotheses). +This too can be done by setting an *order restriction* on the prior and posterior distributions [@morey2014simple]. +For example, if we have a prior hypothesis that *the difference in IQ between the two groups is positive*, +the alternative will be restricted to the region to the right of the null (point or interval): -It seems that the model without `carb` as a predictor is $1/BF=1.2$ times more -likely than the model *with* `carb` as a predictor. We might then assume that in -the latter model, the `HDI` will include the point-null value of 0 effect, to also -indicate the credibility of the null in the posterior. However, this is not the -case: +```{R} +(sddr_directional <- bayesfactor_parameters(mod_H2, null = c(-5, 5), direction = "right")) -```{r} -hdi(mod_carb, ci = 0.95) +plot(sddr_directional) ``` -How can this be? By estimating the HDI of the effect for `carb` in the full -model, we are acting under the assumption that this model is correct. However, -as we've just seen, both models are practically tied. If this is the case **why limit our estimation of the effect just to one model?** [@van2019cautionary]. +As we can see, given that we have an *a priori* assumption about the direction of the difference, the evidence against the null is even stronger. Again, given this order restriction on the alternative hypothesis, the posterior mass has substantially shifted away and outside the null value, giving some extreme evidence against the null and in favor of the alternative. -Using Bayesian Model Averaging, we can combine the posteriors samples from -several models, weighted by the models' marginal likelihood (done via the -`bayesfactor_models()` function). If some parameter is part of some of the -models but is missing from others, it is assumed to be fixed a 0 (which can also -be seen as a method of applying shrinkage to our estimates). This results in a -posterior distribution across several models, which we can now treat like any -posterior distribution, and estimate the HDI. +### Support intervals and curves {#si} -In `bayestestR`, we can do this with the `weighted_posteriors()` function: +So far we've seen that Bayes factors quantify relative support between competing hypotheses. However, we can also ask: -```{r eval=FALSE} -BMA_draws <- weighted_posteriors(mod, mod_carb, verbose = FALSE) +> **Upon observing the data, the credibility of which of the parameter’s values has increased (or decreased)?** -BMA_hdi <- hdi(BMA_draws, ci = 0.95) -BMA_hdi +For example, we've seen that the point null has become less credible after observing the data, +but we might also ask which values have **gained** credibility given the observed data? +The resulting range of values is called **the support interval** +as it indicates which values are supported by the data [@wagenmakers2018SI]. +We can do this by once again comparing the prior and posterior distributions and checking where the posterior densities are higher than the prior densities. -plot(BMA_hdi) -``` -```{r echo=FALSE} -BMA_draws <- weighted_posteriors(mod, mod_carb, verbose = FALSE) +In `bayestestR`, this can be achieved with the `si()` function: -BMA_hdi <- hdi(BMA_draws, ci = 0.95) -BMA_hdi +```{r} +my_first_si <- si(mod_H2, BF = 1, verbose = FALSE) -plot(BMA_hdi, data = BMA_draws) +print(my_first_si) ``` -We can see that across both models under consideration, the posterior of the -`carb` effect is almost equally weighted between the alternative model and the -null model - as represented by about half of the posterior mass concentrated at -0 - which makes sense as both models were almost equally supported by the data. -We can also see that across both models, that now **the HDI does contain 0**. -Thus we have resolved the conflict between the Bayes factor and the HDI -[@rouder2018bayesian]! +The argument `BF = 1` indicates that +we want the interval to contain values that have gained support by a factor of at least 1 +(that is, _any_ support at all). -**Note**: Parameters might play different roles across different models. +Note that this is different from a credible interval, which contains values that have high credibility in the posterior distribution, regardless of how much their credibility has changed from the prior distribution: -For example, the parameter `A` plays a different role in the model `Y ~ A + B` -(where it is a *main* effect) than it does in the model `Y ~ A + B + A:B` (where -it is a *simple* effect). In many cases centering of predictors (mean subtracting -for continuous variables, and orthogonal coding for factors) can in some cases reduce this issue. - -# Appendices - -## Testing contrasts (with `emmeans` / `modelbased`) - -Besides testing parameter `bayesfactor_parameters()` can be used to test any -estimate based on the prior and posterior distribution of the estimate. One way -to achieve this is with a mix of `bayesfactor_parameters()` + -[**`emmeans`**](https://cran.r-project.org/package=emmeans) to [test Bayesian -contrasts](https://easystats.github.io/blog/posts/bayestestr_emmeans/). +```{r} +hdi(mod_H2) +``` -For example, in the `sleep` example from above, we can estimate the group means -and the difference between them: +Visually, we can see that the credibility of all the values within this interval +has increased (and likewise the credibility of all the values outside this +interval has decreased): -```{r, echo=FALSE} -set.seed(1) +```{r} +plot(my_first_si) ``` -```{r eval=FALSE} -library(emmeans) +We can also see the this support interval excludes the point null +(0) - whose credibility we've already seen has decreased by the observed data. +This emphasizes the relationship between the support interval and the Bayes +factor: -(group_diff <- emmeans(model, pairwise ~ group, data = sleep)) +> "The interpretation of such intervals would be analogous to how a frequentist +confidence interval contains all the parameter values that would not have been +rejected if tested at level $\alpha$. For instance, a BF = 1/3 support interval +encloses all values of theta for which the updating factor is not stronger than +3 against." [@wagenmakers2018SI] -# pass the original model via prior -bayesfactor_parameters(group_diff, prior = model) -``` -```{r echo=FALSE, message=FALSE, warning=FALSE} -library(emmeans) +Thus, the choice of BF (the level of support the interval should indicate) +depends on what we want our interval to represent: -suppressWarnings((group_diff <- emmeans(model, pairwise ~ group, data = sleep))) +- A $BF = 1$ contains values whose credibility has merely not decreased by +observing the data. +- A $BF > 1$ contains values who received more impressive support from the data. +- A $BF < 1$ contains values whose credibility has *not* been impressively +decreased by observing the data. Testing against values outside this interval +will produce a Bayes factor larger than $1/BF$ in support of the alternative. -# pass the original model via prior -suppressWarnings(bayesfactor_parameters(group_diff, prior = model)) -``` +# Appendix: Specifying correct priors for factors {#contr_bayes} -That is strong evidence for the mean of group 1 being 0, and for group 2 for not -being 0, but hardly any evidence for the difference between them being not 0. -Conflict? Uncertainty? That is the Bayesian way! +When modeling predictors with more than 2 levels (e.g., factors) +there any many options for how to _encode_ the factor into the model (e.g., dummy coding, sum coding, etc.). +Unlike frequentist modeling, where the choice of contrast coding is mostly a matter of interpretability and convenience, +in Bayesian modeling different encodings -- and priors on those encodings -- +can lead to different implied priors. -We can also use the `easystats`' [**`modelbased`**](https://cran.r-project.org/package=modelbased) package to compute Bayes factors for contrasts: +@rouder2017bayesian discuss how one might wish to set some global multidimensional prior (the _g_-prior) +on the factor's levels that is not sensitive to the order of level or the choice of reference group. +These are implamneted in `contr.equalprior()` and its siblings. -```{r, echo=FALSE} -set.seed(1) -``` +Below we demonstrate how the choice of contrast coding can lead to different implied priors regarding the possible _ordering_ and _differences_ between the factor's levels. -```{r, eval=FALSE} -library(modelbased) +Let us fit 3 models with different contrast codings for a factor with 3 levels: -estimate_contrasts(model, test = "bf", bf_prior = model) -``` +```{r} +library(rstanarm) +library(bayestestR) -**NOTE**: See the *Specifying Correct Priors for Factors with More Than 2 Levels* section below. +data("disgust", package = "bayestestR") -## Specifying correct priors for factors {#contr_bayes} +# Use R's default treatment contrasts (first level as reference) +mod_odor.treatment <- stan_glm( + score ~ condition, + family = gaussian(), + data = disgust, -This section introduces the biased priors obtained when using the common *effects* factor coding (`contr.sum`) or dummy factor coding (`contr.treatment`), and the solution of using orthonormal factor coding (`contr.equalprior`) [as outlined in @rouder2012default, section 7.2]. + prior = normal(location = 0, scale = 2), -**Special care should be taken when working with factors with 3 or more levels**. + contrasts = list(condition = "contr.treatment"), -### Contrasts (and marginal means) + chains = 10, + iter = 5000, + warmup = 1000, + refresh = 0, + diagnostic_file = file.path(tempdir(), "df5.csv") +) -The *effects* factor coding commonly used in factorial analysis carries a hidden -bias when it is applies to Bayesian priors. For example, if we want to test all -pairwise differences between 3 levels of the same factor, we would expect all *a priori* differences to have the same distribution, but... +# Use effects contrasts (sum-to-zero) +mod_odor.sum <- stan_glm( + score ~ condition, + family = gaussian(), + data = disgust, -For our example, we will be test all ***prior*** pairwise differences between -the 3 species in the `iris` dataset. + prior = normal(location = 0, scale = 2), -```{r, eval=FALSE} -df <- iris -contrasts(df$Species) <- contr.sum + contrasts = list(condition = "contr.sum"), -fit_sum <- stan_glm(Sepal.Length ~ Species, - data = df, - prior = normal(0, c(1, 1), autoscale = FALSE), - prior_PD = TRUE, # sample priors - family = gaussian(), - chains = 10, iter = 5000, warmup = 1000, - refresh = 0 + chains = 10, + iter = 5000, + warmup = 1000, + refresh = 0, + diagnostic_file = file.path(tempdir(), "df6.csv") ) -``` -```{r, echo=FALSE} -df <- iris -contrasts(df$Species) <- contr.sum - -fit_sum <- stan_glm(Sepal.Length ~ Species, - data = df, - prior = normal(0, c(1, 1), autoscale = FALSE), - prior_PD = TRUE, # sample priors +mod_odor.equalprior <- stan_glm( + score ~ condition, family = gaussian(), - chains = 4, iter = 2000, warmup = 1000, # reduce build time - refresh = 0 -) -``` + data = disgust, -```{r eval=FALSE} -(pairs_sum <- pairs(emmeans(fit_sum, ~Species))) + prior = normal(location = 0, scale = 2), -ggplot(stack(insight::get_parameters(pairs_sum)), aes(x = values, fill = ind)) + - geom_density(linewidth = 1) + - facet_grid(ind ~ .) + - labs(x = "prior difference values") + - theme(legend.position = "none") -``` -```{r echo=FALSE, warning=FALSE, message=FALSE} -(pairs_sum <- suppressWarnings(pairs(emmeans(fit_sum, ~Species, data = iris)))) - -ggplot(stack(insight::get_parameters(pairs_sum)), aes(x = values, fill = ind)) + - geom_density(linewidth = 1) + - facet_grid(ind ~ .) + - labs(x = "prior difference values") + - theme(legend.position = "none") + contrasts = list(condition = "contr.equalprior"), + + chains = 10, + iter = 5000, + warmup = 1000, + refresh = 0, + diagnostic_file = file.path(tempdir(), "df7.csv") +) ``` -Notice that, though the prior estimate for all 3 pairwise contrasts is ~0, the -scale or the HDI is much narrower for the prior of the `setosa - versicolor` -contrast! +Let's use `{marginaleffects}` to obtain estimates from these Bayesian (prior) models +(after we already showed how do do so with `{emmeans}` above). -**What happened???** -This is caused by an inherent bias in the priors introduced by the *effects* coding (it's even worse with the default treatment coding, because the prior for the intercept is usually drastically different from the effect's parameters). **And since it affects the priors, this bias will also bias the Bayes factors over / understating evidence for some contrasts over others!** -The solution is to use *equal-prior* factor coding, a-la the `contr.equalprior*` family, which can either specify this factor coding per-factor: ```{r} -contrasts(df$Species) <- contr.equalprior_pairs -``` +mod_odor.treatment_prior <- unupdate(mod_odor.treatment) +mod_odor.sum_prior <- unupdate(mod_odor.sum) +mod_odor.equalprior_prior <- unupdate(mod_odor.equalprior) -Or you can set it globally: +(pr_treatment_prior <- avg_predictions( + mod_odor.treatment_prior, + variables = "condition" +)) -```{r, eval=FALSE} -options(contrasts = c("contr.equalprior_pairs", "contr.poly")) -``` - -Let's again estimate the ***prior*** differences: - -```{r, eval=FALSE} -fit_bayes <- stan_glm(Sepal.Length ~ Species, - data = df, - prior = normal(0, c(1, 1), autoscale = FALSE), - prior_PD = TRUE, # sample priors - family = gaussian(), - chains = 10, iter = 5000, warmup = 1000, - refresh = 0 -) -``` +(pr_sum_prior <- avg_predictions( + mod_odor.sum_prior, + variables = "condition" +)) -```{r, echo=FALSE} -fit_bayes <- stan_glm(Sepal.Length ~ Species, - data = df, - prior = normal(0, c(1, 1), autoscale = FALSE), - prior_PD = TRUE, # sample priors - family = gaussian(), - chains = 4, iter = 2000, warmup = 1000, # reduce build time - refresh = 0 -) +(pr_equalprior_prior <- avg_predictions( + mod_odor.equalprior_prior, + variables = "condition" +)) ``` +We can see that for all 3 models, the means of all three groups have about the same prior distribution: +$Md=60$, $95 CI [-3, +63]$. -```{r eval=FALSE} -(pairs_bayes <- pairs(emmeans(fit_bayes, ~Species))) +We might expect the same for the differences between the groups, but this is not the case: -ggplot(stack(insight::get_parameters(pairs_bayes)), aes(x = values, fill = ind)) + - geom_density(linewidth = 1) + - facet_grid(ind ~ .) + - labs(x = "prior difference values") + - theme(legend.position = "none") -``` -```{r echo=FALSE, warning=FALSE, message=FALSE} -(pairs_bayes <- suppressWarnings(pairs(emmeans(fit_bayes, ~Species, data = iris)))) - -ggplot(stack(insight::get_parameters(pairs_bayes)), aes(x = values, fill = ind)) + - geom_density(linewidth = 1) + - facet_grid(ind ~ .) + - labs(x = "prior difference values") + - theme(legend.position = "none") +```{r} +avg_comparisons(mod_odor.treatment_prior, variables = list("condition" = "pairwise")) ``` -We can see that using the `contr.equalprior_pairs` coding scheme, we have equal priors on all pairwise contrasts, with the width corresponding to the `normal(0, c(1, 1), autoscale = FALSE)` prior we set! - -There are other solutions to this problem of priors. You can read about them in [Solomon Kurz's blog post](https://solomonkurz.netlify.app/post/2020-12-09-multilevel-models-and-the-index-variable-approach/). - -### Order restrictions +We can see that while the prior differences are all centered on 0, +the prior difference of the `sulfer - lemon` comparison is much wider compared to the comparisons involving the `control` condition. -This bias also affect order restrictions involving 3 or more levels. For -example, if we want to test an order restriction among A, B, and C, the *a -priori* probability of obtaining the order A > C > B is 1/6 (reach back to -*intro to stats* year 1), but... +With effects coding (sum-to-zero), we also get different implied priors for the differences between the groups, +this time the `lemon - control` difference is much narrower than the other two differences involving the `sulfer` condition: -For our example, we will be interested in the following order restrictions in -the `iris` dataset (each line is a separate restriction): +```{r} +avg_comparisons(mod_odor.sum_prior, variables = list("condition" = "pairwise")) +``` +But the `contr.equalprior()` coding gives us the same prior distribution for all differences between the groups: ```{r} -hyp <- c( - # comparing 2 levels - "setosa < versicolor", - "setosa < virginica", - "versicolor < virginica", - - # comparing 3 (or more) levels - "setosa < virginica & virginica < versicolor", - "virginica < setosa & setosa < versicolor", - "setosa < versicolor & versicolor < virginica" -) +avg_comparisons(mod_odor.equalprior_prior, variables = list("condition" = "pairwise")) ``` -With the default factor coding, this looks like this: +Likewise, the implied priors for the ordering of the groups are different across the three models: -```{r, eval=FALSE} -contrasts(df$Species) <- contr.sum +```{r} +pr_treatment <- avg_predictions(mod_odor.treatment, variables = "condition") -fit_sum <- stan_glm(Sepal.Length ~ Species, - data = df, - prior = normal(0, c(1, 1), autoscale = FALSE), - family = gaussian(), - chains = 10, iter = 5000, warmup = 1000 +bayesfactor_restricted( + posterior = pr_treatment, + prior = pr_treatment_prior, + hypothesis = "b2 < b1 & b1 < b3" ) - -em_sum <- emmeans(fit_sum, ~Species) # the posterior marginal means - -bayesfactor_restricted(em_sum, fit_sum, hypothesis = hyp) ``` -```{r, echo=FALSE} -contrasts(df$Species)[, ] <- contr.sum(3) +```{r} +pr_sum <- avg_predictions(mod_odor.sum, variables = "condition") -fit_sum <- stan_glm(Sepal.Length ~ Species, - data = df, - prior = normal(0, c(1, 1), autoscale = FALSE), - family = gaussian(), - chains = 4, iter = 2000, warmup = 1000 # reduce build time +bayesfactor_restricted( + posterior = pr_sum, + prior = pr_sum_prior, + hypothesis = "b2 < b1 & b1 < b3" ) - -em_sum <- suppressWarnings(emmeans(fit_sum, ~Species, data = iris)) - -bayesfactor_restricted(em_sum, fit_sum, hypothesis = hyp) ``` -***What happened???*** - -1. The comparison of 2 levels all have a prior of ~0.5, as expected. -2. The comparison of 3 levels has different priors, depending on the order restriction - i.e. **some orders are *a priori* more likely than others!!!** - -Again, this is solved by using the *equal prior* factor coding (from above). - -```{r, eval=FALSE} -contrasts(df$Species) <- contr.equalprior_pairs +```{r} +pr_equalprior <- avg_predictions(mod_odor.equalprior, variables = "condition") -fit_bayes <- stan_glm(Sepal.Length ~ Species, - data = df, - prior = normal(0, c(1, 1), autoscale = FALSE), - family = gaussian(), - chains = 10, iter = 5000, warmup = 1000 +bayesfactor_restricted( + posterior = pr_equalprior, + prior = pr_equalprior_prior, + hypothesis = "b2 < b1 & b1 < b3" ) -em_bayes <- emmeans(fit_sum, ~Species) # the posterior marginal means -bayesfactor_restricted(em_bayes, fit_sum, hypothesis = hyp) ``` -```{r, echo=FALSE} -contrasts(df$Species)[, ] <- contr.equalprior_pairs(3) -fit_bayes <- stan_glm(Sepal.Length ~ Species, - data = df, - prior = normal(0, c(1, 1), autoscale = FALSE), - family = gaussian(), - chains = 4, iter = 2000, warmup = 1000, # reduce build time - refresh = 0 -) -em_bayes <- suppressWarnings(emmeans(fit_bayes, ~Species, data = iris)) -bayesfactor_restricted(em_bayes, fit_bayes, hypothesis = hyp) -``` +We can see that while all models have the very similiar posterior distributions, +the implied prior orders are different, with only the `contr.equalprior()` coding giving us a prior that does not favor any particular ordering of the groups (and gives 1/6 prior probability to each of the 6 possible orderings of 3 groups). -### Conclusion +While `contr.equalprior()` gives the original formulation given by @rouder2017bayesian, +the `contr.equalprior_pairs()` and `contr.equalprior_deviations()` give slightly more intuitive coding schemes: -When comparing the results from the two factor coding schemes, we find: -1. In both cases, the estimated (posterior) means are quite similar (if not identical). -2. The priors and Bayes factors differ between the two schemes. -3. Only with `contr.equalprior*`, the prior distribution of the difference or the order of 3 (or more) means is balanced. +- `contr.equalprior_pairs()` allows for setting a prior of what a all pairwise differences might be. +- `contr.equalprior_deviations()` allows for setting a prior of what the difference between each group and the grand mean might be. -Read more about the equal prior contrasts in the `contr.equalprior` docs! +***Note:*** all priors set on these contrast codings _must_ be centered on 0 to work! # References diff --git a/vignettes/bayes_factors[WIP].Rmd b/vignettes/bayes_factors[WIP].Rmd deleted file mode 100644 index d1075e10a..000000000 --- a/vignettes/bayes_factors[WIP].Rmd +++ /dev/null @@ -1,760 +0,0 @@ ---- -title: "Bayes Factors" -output: - rmarkdown::html_vignette: - toc: true - toc_depth: 2 - fig_width: 10.08 - fig_height: 6 -tags: [r, bayesian, bayes factors] -vignette: > - \usepackage[utf8]{inputenc} - %\VignetteIndexEntry{Bayes Factors} - %\VignetteEngine{knitr::rmarkdown} -editor_options: - chunk_output_type: console -bibliography: bibliography.bib -csl: apa.csl ---- - -This vignette can be referred to by citing the following: - -- Makowski, D., Ben-Shachar, M. S., \& Lüdecke, D. (2019). *bayestestR: Describing Effects and their Uncertainty, Existence and Significance within the Bayesian Framework*. Journal of Open Source Software, 4(40), 1541. https://doi.org/10.21105/joss.01541 - -- Makowski, D., Ben-Shachar, M. S., Chen, S. H. A., \& Lüdecke, D. (2019). *Indices of Effect Existence and Significance in the Bayesian Framework*. Retrieved from [10.3389/fpsyg.2019.02767](https://doi.org/10.3389/fpsyg.2019.02767) - ---- - -```{r setup, include=FALSE} -library(knitr) - -options(knitr.kable.NA = "", digits = 2) - -knitr::opts_chunk$set( - echo = TRUE, - comment = ">", - out.width = "100%", - message = FALSE, - warning = FALSE, - dpi = 150 -) - -pkgs <- c( - "effectsize", - "bayestestR", - "ggplot2", - "see", - "rstanarm", - "bridgesampling", - "emmeans", - "logspline" -) - -if (!all(sapply(pkgs, require, quietly = TRUE, character.only = TRUE))) { - knitr::opts_chunk$set(eval = FALSE) -} else { - theme_set(theme_modern()) -} -``` - -The adoption of the Bayesian framework, especially in the -social and psychological sciences, seems to be developing in two distinct directions, -with their separation often marked by their opinion of the **Bayes factor**. -In short, one school of thought (e.g., the *Amsterdam school*, led by [E. J. Wagenmakers](https://www.bayesianspectacles.org/)) advocate its use, and emphasize its qualities as a statistical index, -while another point to its limits and prefer, instead, the precise description of posterior distributions (using [CIs](https://easystats.github.io/bayestestR/reference/hdi.html), [ROPEs](https://easystats.github.io/bayestestR/reference/rope.html), etc.). - -The `bayestestR` package does **not** take a side in this debate, -and offers tools to carry out analysis irrespective of the school you subscribe to. -Instead, it strongly supports the notion of an *informed choice*: - -**discover the methods, learn about them, understand them, try them, and decide for yourself**. - -Having said that, here's an introduction to Bayes factors :) - -# The Bayes Factor - -There are many ways to conceptualize Bayes factors (BFs), but fundamentally: -**BFs are indices of *relative* evidence between two *hypotheses* over another**. - -According to Bayes' theorem, -given a the prior probability of some hypothesis $\mathcal{H}$ ($P(\mathcal{H})$) -and some data $\mathcal{D}$, -we can find the posterior probability of the hypothesis ($P(\mathcal{H|D})$) -by accounting for the probability of observing that datum were the hypothesis true ($P(\mathcal{D|H})$, -also known as the *likelihood*):^[normalized by the marginal probability of observing the data, $P(\mathcal{D})$, which we will soon see is often not needed.] - -$$ -P(\mathcal{H|D}) = \frac{P(\mathcal{D|H})\times P(\mathcal{H})}{P(\mathcal{D})} -$$ - -Within this context, an hypothesis is formalized through the specification of an a-priori model: -Priors on the parameters ($\Theta$) that define the data generating process. - -If we have two hypothesis, we can find their posterior probability-odds as such: - -$$ -\underbrace{\frac{P(\mathcal{H}_1|\mathcal{D})}{P(\mathcal{H}_2|\mathcal{D})}}_{\text{Posterior Odds}} = -\underbrace{\frac{P(\mathcal{D}|\mathcal{H}_1)}{P(\mathcal{D}|\mathcal{H}_2)}}_{\text{Likelihood Ratio}} -\times -\underbrace{\frac{P(\mathcal{H}_1)}{P(\mathcal{H}_2)}}_{\text{Prior Odds}} -$$ - -Where the *likelihood ratio* (the middle term) is the ***Bayes factor*** - -it is the ***factor*** by which some **prior odds** have been updated _after_ observing the data to **posterior odds**. -This value can also be thought is _predictive terms_ - how well has a formalized prior model predicted the observed data compared to another formalized prior model? - -Thus, Bayes factors can be calculated in two ways: {#bf-definitions} - -- As a ratio quantifying **the relative probability of the observed data under each of the two hypotheses**: (In some contexts, these probabilities are also called *marginal likelihoods*.) - -$$ -BF_{12}=\frac{P(\mathcal{D}|\mathcal{H}_1)}{P(\mathcal{D}|\mathcal{H}_2)} -$$ - -- As **the degree of shift in prior beliefs** about the relative credibility of -two hypotheses (since they can be computed by dividing posterior odds by prior -odds). - -$$ -BF_{12}=\frac{\text{Posterior Odds}_{12}}{\text{Prior Odds}_{12}} -$$ - - -`{bayestestR}` provides functions for computing Bayes factors in two different contexts: - -- **Comparing statistical models that differ on their priors which represent two competing hypotheses** -- **Testing single estimates (parameters, coefficients, transformed parameters) within a given model** - -# 1. Comparing Models using Bayes Factors {#bayesfactor_models} - -Let's take a look at the _kid IQ_ dataset from the `{rstanarm}` package. - -```{r} -data("kidiq", package = "rstanarm") - -kidiq <- subset(kidiq, select = c(kid_score, mom_hs)) -kidiq <- transform(kidiq, mom_hs = factor(mom_hs, levels = 0:1, labels = c("no", "yes"))) - -head(kidiq) -``` - -We'll be trying to answer a simple question: -what is the mean difference in IQ scores between children whose mothers completed high-school and those whose mothers did not complete high school (as indicated by the `mom_hs` variable). - -There are many hypothesis we might have about this difference. Let's start by examining: - -- $\mathcal{H}_0$: There's no difference in IQ between the two groups. -- $\mathcal{H}_1$: The difference is probably around 20 point in favor of kids whose mothers completed high school. -- $\mathcal{H}_2$: A more conservative hypothesis that the difference, if it exists, is probably no more than about 5 point in either direction. - -Let's plot these: - -```{r, echo=FALSE} -p_prior0 <- ggplot() + - geom_vline(xintercept = 0, linetype = "dashed") + - geom_segment( - aes(x = 0, xend = 0, y = 0, yend = 1), - linewidth = 1, - color = "royalblue" - ) + - geom_point(aes(x = 0, y = 1), size = 3, color = "royalblue") + - theme(axis.text.y = element_blank()) + - expand_limits(y = 1.5, x = c(-50, 50)) + - labs( - x = "Difference", - y = NULL, - title = expression(H[0] ~ ":" ~ theta == 0) - ) - -p_prior1 <- ggplot() + - stat_function( - geom = "area", - fun = dnorm, - args = list(mean = 20, sd = 10), - xlim = c(-50, 50), - fill = "royalblue", - ) + - geom_vline(xintercept = 0, linetype = "dashed") + - theme(axis.text.y = element_blank()) + - expand_limits(y = 0.05) + - labs( - x = "Difference", - y = NULL, - title = expression(H[1] ~ ":" ~ theta %~% norma(20, 10^2)) - ) - -p_prior2 <- ggplot() + - stat_function( - geom = "area", - fun = dnorm, - args = list(mean = 0, sd = 5), - xlim = c(-50, 50), - fill = "royalblue", - ) + - geom_vline(xintercept = 0, linetype = "dashed") + - theme(axis.text.y = element_blank()) + - expand_limits(y = 0.05) + - labs( - x = "Difference", - y = NULL, - title = expression(H[2] ~ ":" ~ theta %~% norma(0, 5^2)) - ) - -plots(p_prior0, p_prior1, p_prior2, n_columns = 1) -``` - -We can build models with these different priors with `{brms}` or `{rstanarm}`:^[We will be using `{rstanarm}` throughout this vignette, but `bayestestR` also supports `{brms}`, `{blavaan}`, `{rstan}`, `{cmdstanr}`, `{BayesFactor}` and more.] - -In any case, note the we will always require _many_ posterior samples for the stability of our BF estimation (typically 10 times more than what we would need for posterior estimation alone; @gronau2020bridgesampling). - -```{r} -library(rstanarm) - -mod_H0 <- stan_glm( - kid_score ~ 1, - family = gaussian(), - data = kidiq, - - chains = 10, - iter = 5000, - warmup = 1000, - refresh = 0, - # required for BF computation - diagnostic_file = file.path(tempdir(), "df0.csv") -) - -mod_H1 <- stan_glm( - kid_score ~ mom_hs, - family = gaussian(), - data = kidiq, - - prior = normal(location = 20, scale = 10), - - chains = 10, - iter = 5000, - warmup = 1000, - refresh = 0, - diagnostic_file = file.path(tempdir(), "df1.csv") -) - -mod_H2 <- stan_glm( - kid_score ~ mom_hs, - family = gaussian(), - data = kidiq, - - prior = normal(location = 0, scale = 5), - - chains = 10, - iter = 5000, - warmup = 1000, - refresh = 0, - diagnostic_file = file.path(tempdir(), "df2.csv") -) -``` - -We can now ask: which a-priori model (each representing a different hypothesis) is more likely to have produced the observed data? - -This is usually done by comparing the marginal likelihoods of two models. In -such a case, the Bayes factor is a measure of the **relative** evidence for one -hypothesis over the other. - -```{r} -bfs <- bayesfactor_models(mod_H1, mod_H2, denominator = mod_H0, verbose = FALSE) - -print(bfs, show_names = TRUE) -``` - - -We can see that both models that allow for a difference between the groups -are much more supported by the data - -with $BF>`r insight::format_value(exp(bfs$log_BF[2]))`$ - -compared to the null (intercept only). - -Note that **interpretation guides** for Bayes factors can be found in the `effectsize` package: - -```{r} -effectsize::interpret_bf(bfs$log_BF[1:2], log = TRUE) -``` - -Due to the transitive property of Bayes factors, -we can easily change the reference model to the model representing $\mathcal{H}_2$: - -```{r update_models1} -bfs2 <- update(bfs, reference = 2, subset = 1) - -print(bfs2, show_names = TRUE) -``` - -The data supports the a-priori model that suggests a positive difference almost 4 times over the model that suggests a small difference. - -We can also get a matrix of Bayes factors of all the pairwise model comparisons: - -```{r} -print(as.matrix(bfs), show_names = TRUE) -``` - -Overall, we can see that both models that allow for some non-0 difference are much more supported by the data compared to the 0-difference model. Let's take a look at the data: - -```{r, echo=FALSE} -ggplot(kidiq, aes(mom_hs, kid_score, fill = mom_hs, color = mom_hs)) + - geom_violindot() + - geom_boxplot(fill = NA, position = position_nudge(-0.2), width = 0.1) + - labs(x = "Mom completed high-school?", y = "Kids' IQ") + - guides(fill = "none", color = "none") -``` - -And indeed both models 1 and 2's posteriors reflect this difference: - -```{r, echo=FALSE} -plots( - plot(hdi(mod_H1)) + - labs(y = NULL, title = "Model 1") + - coord_cartesian(xlim = c(-20, 20)) + - scale_y_discrete(expand = expansion(0.1, 0)) + - guides(fill = "none"), - plot(hdi(mod_H2)) + - coord_cartesian(xlim = c(-20, 20)) + - scale_y_discrete(expand = expansion(0.1, 0)) + - labs(y = NULL, title = "Model 2"), - - n_columns = 1, - guides = "collect" -) -``` - -Note that these posterior distributions are _very_ similar, -but BFs do not compare posterior models - only _a-priori_ models! - -For this reason, computing BFs only makes sense if we are able to formulate our hypotheses -into distinct priors. - -## The BIC approximation - -It is also possible to compute *approximate* Bayes factors for the comparison of *frequentist* models (😱). -This is done by comparing BIC indices, allowing a Bayesian comparison -of nested as well as non-nested frequentist models [@wagenmakers2007practical]. - -Since frequentist modeling does not allow for specification of priors, we are limited to either restricting parameters to 0 or not. - -```{r} -mod_H0f <- lm(kid_score ~ 1, data = kidiq) - -mod_H1f <- lm(kid_score ~ mom_hs, data = kidiq) - -bayesfactor_models(mod_H1f, denominator = mod_H0f) -``` - -(Note how similar this approximate BF is to the proper BFs estimated above.) - - -## Model averaging - -In the previous section, we discussed the direct comparison of two models to -determine if a hypothesis is supported by the data. -However, in many cases there are too many models to consider, -or perhaps it is not straightforward which models we should be comparing to determine if an effect is supported by the data. -For such cases, we can use Bayesian model averaging (BMA) to determine the support -provided by the data for a parameter or model-term across many models. - -### Inclusion Bayes factors {#bayesfactor_inclusion} - -Inclusion Bayes factors answer the question: - -> **Are the observed data more probable under models with a particular predictor, than they are under models without that particular predictor?** - -In other words, on average, are models with predictor $X$ more likely to have -produced the observed data than models without predictor $X$?^[A model without -predictor $X$ can be thought of as a model in which the parameter(s) of the -predictor have been restricted to a null-point of 0.] - -These Bayes factors are computed not as the ratios of marginal likelihoods, -but as **the degree of shift in prior beliefs**: -Since each model has a prior probability, it is possible to sum the prior -probability of all models that include a predictor of interest (the *prior inclusion probability*), and of all models that do not include that predictor -(the *prior exclusion probability*). After the data are observed, and each model -is assigned a posterior probability, we can similarly consider the sums of the -posterior models' probabilities to obtain the *posterior inclusion probability* -and the *posterior exclusion probability*. The change from prior -inclusion odds to the posterior inclusion odds is the **Inclusion Bayes factor** -[$BF_{Inclusion}$; @clyde2011bayesian]. - -```{r} -(bfinc <- bayesfactor_inclusion(bfs)) -``` - -(`bayesfactor_inclusion()` is meant to provide Bayes Factors per predictor, similar to JASP's *Effects* option.) - -We can see that across the 3 models under consideration, models _with_ the `mom_hs` term fit the data `r insight::format_value(exp(bfinc$log_BF))` times more than the model _without_ that term. - -### Averaging posteriors {#weighted_posteriors} - -Similar to how we can average evidence for a predictor across models, we can -also average the **posterior estimate** across models. - -```{r} -ppp <- weighted_posteriors(mod_H0, mod_H1, mod_H2) - -plot(hdi(ppp$mom_hsyes)) + - coord_cartesian(xlim = c(-20, 20)) -``` - -This looks a lot like the posterior obtained from the second model, -which shouldn't be surprising since about 80% of the averaged posterior comes from the second model. - -```{r} -attr(ppp, "weights") -``` - - -## Order restricted models {#bayesfactor_restricted} - -We've already seen we can formalize hypothesis into distributional priors -(e.g., _the difference is probably no more than about 5 point in either direction._ became $theta \sim Normal(0, 5^2)$). -These priors are **unrestricted** - that is, **all values** between $-\infty$ -and $\infty$ of all parameters in the model have some non-zero credibility (no -matter how small; this is true for both the prior and posterior distribution). - -But we can also formalize hypotheses as **order restrictions** [@morey_2015_blog; -@morey2011bayesinterval]. - -For example, we can impose an _additional_ order restriction -that the difference _must be positive_, which we can write like this (if we had to): - -$$ -\mathcal{H}_{2r}: theta \sim Normal(0, 5^2)\begin{bmatrix} \infty \\ 0 \end{bmatrix} -$$ - -By testing the probabilities of these restrictions on prior and posterior samples, -we can see how the probabilities of the restricted distributions _change_ after observing the data - -[this change is a Bayes factor](#bf-definitions). -These can be achieved with `bayesfactor_restricted()`, that compute a Bayes -factor for these restricted model vs the unrestricted model. - -```{r} -bayesfactor_restricted(mod_H2, hypothesis = "mom_hsyes > 0") -``` - - -In other words, the data fits the restricted model (where the difference must be small _and positive_) twice as much as it fits the un-restircted model (where the difference must be small). - -We can compare multiple restricted hypotheses. For example: that the difference isn't just positive, it's larger than 4. - -```{r} -bf_rstr <- bayesfactor_restricted( - mod_H2, - hypothesis = c( - positive = "mom_hsyes > 0", - strong = "mom_hsyes > 4" - ) -) -``` - -Here too we can obtain a matrix of BFs between all models: - -```{r} -print(as.matrix(bf_rstr), show_names = TRUE) -``` - -We can see the "strong" model is preferred over both the un-restricted model and the "positive" model. - -Again, we can use the transitive properties of Bayes factors to find the BF comparing $\mathcal{H}_{2r}$ and $\mathcal{H}_1$: - -$$ -\begin{align} -BF_{2r,1} & = BF_{2,0} \times BF_{2r,2} \\ -& = \frac{P(\mathcal{D}|\mathcal{H}_{2})}{P(\mathcal{D}|\mathcal{H}_0)} -\times \frac{P(\mathcal{D}|\mathcal{H}_{2r})}{P(\mathcal{D}|\mathcal{H}_2)} \\ -& = \frac{P(\mathcal{D}|\mathcal{H}_{2r})}{P(\mathcal{D}|\mathcal{H}_0)} -\end{align} -$$ - -```{r} -BF_2.0 <- as.numeric(bfs)[2] -BF_2r.2 <- as.numeric(bf_rstr)[2] - -(BF_2r.0 <- BF_2.0 * BF_2r.2) -``` - -So the data support the hypothesis that the difference is small but strictly positive -`r insight::format_value(BF_2r.0)` times more than the hypothesis that the difference is exactly 0. - -**Because these restrictions are on the prior distribution, they are only appropriate for testing pre-planned (*a priori*) hypotheses, and should not be used for any post hoc comparisons [@morey_2015_blog].** - ---- - -We are not limited to a single order restrictions - we can compound them to create complex restrictions. - -Let's look at the [`disgust` dataset](http://bayesfactor.blogspot.com/2015/01/multiple-comparisons-with-bayesfactor-2.html), were 150 individuals rated "moral harshness" of undocumented migrants in one of three conditions: no odor, clean odor (lemon), or disgusting (sulfur) odor during questionnaire. - -```{r} -data("disgust", package = "bayestestR") - -str(disgust) -``` - - -Let's build our simple one-way-ANOVA-like model: - -```{r} -mod_odor <- stan_glm( - score ~ condition, - family = gaussian(), - data = disgust, - - prior = normal(location = 0, scale = 2), - - contrasts = list(condition = "contr.equalprior_pairs"), - - chains = 10, - iter = 5000, - warmup = 1000, - refresh = 0, - diagnostic_file = file.path(tempdir(), "df3.csv") -) - -summary(mod_odor) -``` - -**NOTE**: See the *Specifying Correct Priors for Factors with More Than 2 Levels* appendix below for more details on the contrast coding used here. - -Let's obtain the prior and posterior distributions of the condition means using `posterior_epred()`. - -```{r} -mod_odor.prior <- unupdate(mod_odor) # get the priors-only model - -library(emmeans) - -disgust_means <- emmeans(mod_odor, ~condition) -disgust_means.prior <- emmeans(mod_odor.prior, ~condition) -``` - -Our hypothesis is that the moral harshness ratings are lowest in the lemon condition, higher in the control condition, and highest in the sulfur condition - in other words, there is an _order_ of: $\text{lemon} < \text{control} < \text{sulfur}$. - -We can formalize this hypothesis as an order restriction on the means of the three conditions: - -```{r} -bayesfactor_restricted( - posterior = disgust_means, - prior = disgust_means.prior, - hypothesis = "lemon < control & control < sulfur" -) -``` - -We can see that a-priori, this specific ordering of the 3 means has a proability of $\frac{1}{6}$ (1 of 6 possible orderings of 3 values), but after observing the data, this ordering is about ~4 times more likely than any other ordering. - -The transitive properties of Bayes factors can also be used to compute a Bayes factor for **dividing** hypotheses - -that is for two *complementary* opposing one-sided hypotheses [@morey2014simple]. - -For example, above we compared $\mathcal{H}_{+}: \theta > 0$ - *the difference is positive* -to the null $\mathcal{H}_{-}: \theta < 0$: *the difference is negative*: - -$$ -\begin{align} -BF_{+,-} & = BF_{+,0} \times BF_{0,-} \\ -& = \frac{P(\mathcal{D}|\mathcal{H}_{+})}{P(\mathcal{D}|\mathcal{H}_0)} -\times \frac{P(\mathcal{D}|\mathcal{H}_{0})}{P(\mathcal{D}|\mathcal{H}_-)} \\ -& = \frac{P(\mathcal{D}|\mathcal{H}_{+})}{P(\mathcal{D}|\mathcal{H}_{-})} -\end{align} -$$ - - -```{R} -bf_div <- bayesfactor_restricted( - posterior = disgust_means, - prior = disgust_means.prior, - hypothesis = c( - positive = "lemon - sulfur > 0", - negative = "lemon - sulfur < 0" - ) -) - -print(as.matrix(bf_div), show_names = TRUE) -``` - - -The hypothesis that the lemon condition yields lower ratings than the sulfur condition -is about 60 times more supported by the data than -the hypothesis that the lemon condition has higher ratings than the sulfur condition. - -Etc... etc... we can compound as many restrictions as we want, and compare them to each other, or to the unrestricted model, or to the null model, etc. - ---- - -Overall, Bayes factors are a powerful tool for comparing the relative evidence of two _formalized_ hypotheses (i.e., hypotheses that have been formalized into distinct priors). - -Note that Bayes factors are _not_ a tool for comparing ***posterior*** models -(for such comparisons, see the [`{loo}` package](https://mc-stan.org/loo/)) - -and in fact two similar posterior models can have very different BFs if their priors are different. - - -# 2. Testing Models' Parameters with Bayes Factors {#bayesfactor_parameters} - -For testing a point null hypothesis (e.g., $\mathcal{H}_0: \theta = 0$) against some alternative non-null hypothesis (e.g., $\mathcal{H}_1: \theta \sim Normal(0, 5^2)$), a nice "short cut" can be used to obtain a Bayes factor - via the Savage-Dickey density ratio [@wagenmakers2010bayesian]. - -If we zoomed-in on the null value $\theta_0$ - what does it mean for the null's credability to have become _lower_ in the posterior distribution? -Well, since the null is less credible, that necessarily means that the alternative is _more_ credible by the same amount! - -Note that for a point null on a continuous parameter, the _probability_ of the null is always 0, and the probability of all other values is 1. However, the _density_ of the null can be non-zero, and it is this density that quantifies the credibility of the null hypothesis. This means that the prior _odds_ of the null vs the alternative are: - -$$ -\text{Prior Odds} = \frac{P(\theta \neq \theta_0)}{P(\theta=\theta_0)} = \frac{1}{P(\theta=\theta_0)} -$$ - -Likewise, the posterior odds of the null vs the alternative are: - -$$ -\text{Posterior Odds} = \frac{P(\theta \neq \theta_0 \mid \mathcal{D})}{P(\theta=\theta_0 \mid \mathcal{D})} - = \frac{1}{P(\theta=\theta_0 \mid \mathcal{D})} \\ -$$ - -Recall that a Bayes factor can be thought of as the degree of shift in the relative credibility of two hypotheses from the prior model to the posterior model: - -$$ -\begin{align} -BF_{10} & = \frac{\text{Posterior Odds}_{10}}{\text{Prior Odds}_{10}} -= \frac{\frac{1}{P(\theta=\theta_0 \mid \mathcal{D})}}{\frac{1}{P(\theta=\theta_0)}} = \\ -& = \frac{P(\theta=\theta_0)}{P(\theta=\theta_0 \mid \mathcal{D})} -\end{align} -$$ - -In other words, it is sufficient to compare the density of the null under the prior distribution ($P(\theta=\theta_0)$) with the density of the null under posterior distribution ($P(\theta=\theta_0 \mid \mathcal{D})$) to obtain a Bayes factor comparing the null and alternative hypotheses - the degree to which the null has become more or less credible after observing the data. - -This can be done using the `bayesfactor_parameters()` - let's use it to test the null hypothesis that the difference in IQ between the two groups is exactly 0: - -```{R} -(sddr <- bayesfactor_parameters(mod_H2, null = 0)) -``` - -Looking at the Savage-Dickey density ratio for the `mom_hsyes` parameter, -we can see that the null has become substantially less credible after observing the data - -and therefore the alternative has become _more_ credible. - -```{R} -plot(sddr) -``` - -We can see that the center of the posterior distribution has shifted away from 0 (to around 10), and the density at 0 has become much smaller in the posterior distribution compared to the prior distribution suggesting that the data is less compatible with the null value of 0 that with other values overall. - -Compare the Savage-Dickey density ratio for the `mom_hsyes` parameter -with the Bayes factor comparing `mod_H2` (the alternative) and `mod_H0` (the null): - -```{R} -print(update(bfs, subset = 2), show_names = TRUE) -``` - -Not perfect, but a good approximation. - -### Testing against a null-*region* - -One way of operationalizing the null-hypothesis is by setting a null _region_, such -that an effect that falls within this interval would be *practically* equivalent -to the null [@kruschke2010believe]. In our case, that means defining a range of -effects we would consider equal to no difference in IQ between the two groups. -Let's say we consider any difference between -5 and 5 points to be practically equivalent to no difference at all, -we would define our null-region as $\mathcal{H}_0: \theta \in [-5, 5]$. - -The Bayes factor for this null-region can be obtained -by comparing the change in the _relative_ credibility of the null-region $\mathcal{H}_0: \theta \in [-5, 5]$ -and the non-null region $\mathcal{H}_1: \theta \notin [-5, 5]$ from the prior to the posterior distribution - -to achieve this, we combine the logic of the Savage-Dickey density ratio -with the logic of the order-restricted Bayes factor! - -This too can be done with `bayesfactor_parameters()`, by specifying a null-region instead of a point null: - -```{R} -(sddr_region <- bayesfactor_parameters(mod_H2, null = c(-5, 5))) - -plot(sddr_region) -``` - -We can see that the null-region has become much less credible by a factor of >100 after observing the data - -suggesting that data is more compatible with non-null values than with null values, -and therefore the alternative (that the difference is outside of the [-5, 5] range) has become relatively much more credible. - -### Directional hypotheses - -We can also compute Bayes factors for directional hypotheses ("one sided"), -if we have a prior hypotheses about the direction of the effect. This is similiar to the _dividing_ Bayes factor discussed above, but we are still comparing the (directional) alternative to the null (not between two directional hypotheses). -This too can be done by setting an *order restriction* on the prior and posterior distributions [@morey2014simple]. -For example, if we have a prior hypothesis that *the difference in IQ between the two groups is positive*, -the alternative will be restricted to the region to the right of the null (point or interval): - -```{R} -(sddr_directional <- bayesfactor_parameters(mod_H2, null = c(-5, 5), direction = "right")) - -plot(sddr_directional) -``` - -As we can see, given that we have an *a priori* assumption about the direction of the difference, the evidence against the null is even stronger. Again, given this order restriction on the alternative hypothesis, the posterior mass has substantially shifted away and outside the null value, giving some extreme evidence against the null and in favor of the alternative. - -### Support intervals and curves {#si} - -So far we've seen that Bayes factors quantify relative support between competing hypotheses. However, we can also ask: - -> **Upon observing the data, the credibility of which of the parameter’s values has increased (or decreased)?** - -For example, we've seen that the point null has become less credible after observing the data, -but we might also ask which values have **gained** credibility given the observed data? -The resulting range of values is called **the support interval** -as it indicates which values are supported by the data [@wagenmakers2018SI]. -We can do this by once again comparing the prior and posterior distributions and checking where the posterior densities are higher than the prior densities. - -In `bayestestR`, this can be achieved with the `si()` function: - -```{r} -my_first_si <- si(mod_H2, BF = 1, verbose = FALSE) - -print(my_first_si) -``` - -The argument `BF = 1` indicates that -we want the interval to contain values that have gained support by a factor of at least 1 -(that is, _any_ support at all). - -Note that this is different from a credible interval, which contains values that have high credibility in the posterior distribution, regardless of how much their credibility has changed from the prior distribution: - -```{r} -hdi(mod_H2) -``` - -Visually, we can see that the credibility of all the values within this interval -has increased (and likewise the credibility of all the values outside this -interval has decreased): - -```{r} -plot(my_first_si) -``` - -We can also see the this support interval excludes the point null -(0) - whose credibility we've already seen has decreased by the observed data. -This emphasizes the relationship between the support interval and the Bayes -factor: - -> "The interpretation of such intervals would be analogous to how a frequentist -confidence interval contains all the parameter values that would not have been -rejected if tested at level $\alpha$. For instance, a BF = 1/3 support interval -encloses all values of theta for which the updating factor is not stronger than -3 against." [@wagenmakers2018SI] - -Thus, the choice of BF (the level of support the interval should indicate) -depends on what we want our interval to represent: - -- A $BF = 1$ contains values whose credibility has merely not decreased by -observing the data. -- A $BF > 1$ contains values who received more impressive support from the data. -- A $BF < 1$ contains values whose credibility has *not* been impressively -decreased by observing the data. Testing against values outside this interval -will produce a Bayes factor larger than $1/BF$ in support of the alternative. - -# Appendices - -```{r} - -``` - -## Specifying correct priors for factors {#contr_bayes} - -## Contrasts (and marginal means) - - -# References diff --git a/vignettes/bibliography.bib b/vignettes/bibliography.bib index 24b1352a4..f93936a53 100644 --- a/vignettes/bibliography.bib +++ b/vignettes/bibliography.bib @@ -478,4 +478,15 @@ @article{gronau2020bridgesampling volume={92}, pages={1--29}, year={2020} -} \ No newline at end of file +} + +@article{rouder2017bayesian, + title={Bayesian analysis of factorial designs.}, + author={Rouder, Jeffrey N and Morey, Richard D and Verhagen, Josine and Swagman, April R and Wagenmakers, Eric-Jan}, + journal={Psychological Methods}, + volume={22}, + number={2}, + pages={304}, + year={2017}, + publisher={American Psychological Association} +} From d3219469b869f7d2b8a83b69fccd58d532a6fca8 Mon Sep 17 00:00:00 2001 From: "Mattan S. Ben-Shachar" Date: Mon, 11 May 2026 20:10:35 +0300 Subject: [PATCH 35/36] clean up diagnostic_posterior.CmdStanFit --- R/diagnostic_posterior.R | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/R/diagnostic_posterior.R b/R/diagnostic_posterior.R index f2e75337e..a1b5d099d 100644 --- a/R/diagnostic_posterior.R +++ b/R/diagnostic_posterior.R @@ -493,7 +493,12 @@ diagnostic_posterior.stanfit <- function( #' @export -diagnostic_posterior.CmdStanFit <- function(posterior, diagnostic = "all", ...) { +diagnostic_posterior.CmdStanFit <- function( + posterior, + diagnostic = "all", + parameters = NULL, + ... +) { if ("all" %in% diagnostic) { diagnostic <- c("ESS", "Rhat", "MCSE") } @@ -501,7 +506,12 @@ diagnostic_posterior.CmdStanFit <- function(posterior, diagnostic = "all", ...) insight::check_if_installed("posterior") insight::check_if_installed("cmdstanr") - draws <- posterior$draws(format = "draws_df") + pars <- insight::find_parameters(posterior, flatten = TRUE) + if (!is.null(parameters)) { + pars <- pars[!grepl(parameters, pars)] + } + + draws <- posterior$draws(format = "draws_df", variables = pars) out <- posterior::summarize_draws( draws, @@ -518,7 +528,7 @@ diagnostic_posterior.CmdStanFit <- function(posterior, diagnostic = "all", ...) ) ) - out[!grepl("^lp_", out$Parameter), c("Parameter", diagnostic), drop = FALSE] + out[, c("Parameter", diagnostic), drop = FALSE] } From 4d51f6a8c8f7911c4350bb99168ed7fac84e592c Mon Sep 17 00:00:00 2001 From: "Mattan S. Ben-Shachar" Date: Mon, 11 May 2026 20:16:04 +0300 Subject: [PATCH 36/36] Update NEWS.md --- NEWS.md | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/NEWS.md b/NEWS.md index 9a24ef1ca..aff5e5c8c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,10 +1,13 @@ -# bayestestR 0.17.0.xxx +# bayestestR 0.17.0.4 ## New functionality -* `as.matrix()` for `bayesfactor_restricted()`, to obtain a matrix of Bayes - factors between all restricted models. -* New dedicated docs for Bayes factor methods `?bayesfactor_methods` +* Improved Bayes factor methods: + + * New docs at `?bayesfactor_methods` + + * `as.matrix()` for `bayesfactor_restricted()`, to obtain a matrix of Bayes factors between all restricted models. + * Added support for `CmdStanFit` models from `{cmdstanr}` and expanded support for `stanfit` models from `rstan`.