diff --git a/DESCRIPTION b/DESCRIPTION index d94a174ca..cee96ac6b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: performance Title: Assessment of Regression Models Performance -Version: 0.11.0.8 +Version: 0.11.0.9 Authors@R: c(person(given = "Daniel", family = "Lüdecke", @@ -154,3 +154,4 @@ Config/Needs/website: r-lib/pkgdown, easystats/easystatstemplate Config/rcmdcheck/ignore-inconsequential-notes: true +Remotes: easystats/insight diff --git a/NEWS.md b/NEWS.md index 3fc0d30df..7b5fefd5b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,6 +5,10 @@ * Aliases `posterior_predictive_check()` and `check_posterior_predictions()` for `check_predictions()` are deprecated. +* Arguments named `group` or `group_by` will be deprecated in a future release. + Please use `by` instead. This affects `check_heterogeneity_bias()` in + *performance*. + ## General * Improved documentation and new vignettes added. diff --git a/R/check_heterogeneity_bias.R b/R/check_heterogeneity_bias.R index d9bb337f9..460dda761 100644 --- a/R/check_heterogeneity_bias.R +++ b/R/check_heterogeneity_bias.R @@ -8,9 +8,10 @@ #' @param select Character vector (or formula) with names of variables to select #' that should be checked. If `x` is a mixed model object, this argument #' will be ignored. -#' @param group Character vector (or formula) with the name of the variable that +#' @param by Character vector (or formula) with the name of the variable that #' indicates the group- or cluster-ID. If `x` is a model object, this #' argument will be ignored. +#' @param group Deprecated. Use `by` instead. #' #' @seealso #' For further details, read the vignette @@ -25,13 +26,18 @@ #' @examples #' data(iris) #' iris$ID <- sample(1:4, nrow(iris), replace = TRUE) # fake-ID -#' check_heterogeneity_bias(iris, select = c("Sepal.Length", "Petal.Length"), group = "ID") +#' check_heterogeneity_bias(iris, select = c("Sepal.Length", "Petal.Length"), by = "ID") #' @export -check_heterogeneity_bias <- function(x, select = NULL, group = NULL) { +check_heterogeneity_bias <- function(x, select = NULL, by = NULL, group = NULL) { + ## TODO: deprecate later + if (!is.null(group)) { + insight::format_warning("Argument `group` is deprecated and will be removed in a future release. Please use `by` instead.") # nolint + by <- group + } if (insight::is_model(x)) { - group <- insight::find_random(x, split_nested = TRUE, flatten = TRUE) - if (is.null(group)) { - insight::format_error("Model is no mixed model. Please provide a mixed model, or a data frame and arguments `select` and `group`.") # nolint + by <- insight::find_random(x, split_nested = TRUE, flatten = TRUE) + if (is.null(by)) { + insight::format_error("Model is no mixed model. Please provide a mixed model, or a data frame and arguments `select` and `by`.") # nolint } my_data <- insight::get_data(x, source = "mf", verbose = FALSE) select <- insight::find_predictors(x, effects = "fixed", component = "conditional", flatten = TRUE) @@ -39,17 +45,19 @@ check_heterogeneity_bias <- function(x, select = NULL, group = NULL) { if (inherits(select, "formula")) { select <- all.vars(select) } - if (inherits(group, "formula")) { - group <- all.vars(group) + if (inherits(by, "formula")) { + by <- all.vars(by) } my_data <- x } - unique_groups <- .n_unique(my_data[[group]]) - combinations <- expand.grid(select, group) + unique_groups <- .n_unique(my_data[[by]]) + combinations <- expand.grid(select, by) result <- Map(function(predictor, id) { # demean predictor + + ## FIXME: update argument name later! d <- datawizard::demean(my_data, select = predictor, group = id, verbose = FALSE) # get new names diff --git a/R/performance_cv.R b/R/performance_cv.R index c5e08be95..f4bfa87ef 100644 --- a/R/performance_cv.R +++ b/R/performance_cv.R @@ -69,7 +69,7 @@ performance_cv <- function(model, test_pred <- insight::get_predicted(model, ci = NULL, data = data) test_resd <- test_resp - test_pred } else if (method == "holdout") { - train_i <- sample(seq_len(nrow(model_data)), size = round((1 - prop) * nrow(model_data)), replace = FALSE) + train_i <- sample.int(nrow(model_data), size = round((1 - prop) * nrow(model_data)), replace = FALSE) model_upd <- stats::update(model, data = model_data[train_i, ]) test_resp <- model_data[-train_i, resp.name] test_pred <- insight::get_predicted(model_upd, ci = NULL, data = model_data[-train_i, ]) diff --git a/man/check_heterogeneity_bias.Rd b/man/check_heterogeneity_bias.Rd index 20c0bba4c..21534540e 100644 --- a/man/check_heterogeneity_bias.Rd +++ b/man/check_heterogeneity_bias.Rd @@ -4,7 +4,7 @@ \alias{check_heterogeneity_bias} \title{Check model predictor for heterogeneity bias} \usage{ -check_heterogeneity_bias(x, select = NULL, group = NULL) +check_heterogeneity_bias(x, select = NULL, by = NULL, group = NULL) } \arguments{ \item{x}{A data frame or a mixed model object.} @@ -13,9 +13,11 @@ check_heterogeneity_bias(x, select = NULL, group = NULL) that should be checked. If \code{x} is a mixed model object, this argument will be ignored.} -\item{group}{Character vector (or formula) with the name of the variable that +\item{by}{Character vector (or formula) with the name of the variable that indicates the group- or cluster-ID. If \code{x} is a model object, this argument will be ignored.} + +\item{group}{Deprecated. Use \code{by} instead.} } \description{ \code{check_heterogeneity_bias()} checks if model predictors or variables may @@ -25,7 +27,7 @@ between-effect (\emph{Bell and Jones, 2015}). \examples{ data(iris) iris$ID <- sample(1:4, nrow(iris), replace = TRUE) # fake-ID -check_heterogeneity_bias(iris, select = c("Sepal.Length", "Petal.Length"), group = "ID") +check_heterogeneity_bias(iris, select = c("Sepal.Length", "Petal.Length"), by = "ID") } \references{ \itemize{ diff --git a/tests/testthat/test-check_heterogeneity_bias.R b/tests/testthat/test-check_heterogeneity_bias.R index 2bd63856e..7042f3064 100644 --- a/tests/testthat/test-check_heterogeneity_bias.R +++ b/tests/testthat/test-check_heterogeneity_bias.R @@ -2,29 +2,29 @@ test_that("check_heterogeneity_bias", { data(iris) set.seed(123) iris$ID <- sample.int(4, nrow(iris), replace = TRUE) # fake-ID - out <- check_heterogeneity_bias(iris, select = c("Sepal.Length", "Petal.Length"), group = "ID") + out <- check_heterogeneity_bias(iris, select = c("Sepal.Length", "Petal.Length"), by = "ID") expect_equal(out, c("Sepal.Length", "Petal.Length"), ignore_attr = TRUE) expect_output(print(out), "Possible heterogeneity bias due to following predictors: Sepal\\.Length, Petal\\.Length") - out <- check_heterogeneity_bias(iris, select = ~ Sepal.Length + Petal.Length, group = ~ID) + out <- check_heterogeneity_bias(iris, select = ~ Sepal.Length + Petal.Length, by = ~ID) expect_equal(out, c("Sepal.Length", "Petal.Length"), ignore_attr = TRUE) expect_output(print(out), "Possible heterogeneity bias due to following predictors: Sepal\\.Length, Petal\\.Length") m <- lm(Sepal.Length ~ Petal.Length + Petal.Width + Species + ID, data = iris) expect_error( - check_heterogeneity_bias(m, select = c("Sepal.Length", "Petal.Length"), group = "ID"), + check_heterogeneity_bias(m, select = c("Sepal.Length", "Petal.Length"), by = "ID"), regex = "no mixed model" ) skip_if_not_installed("lme4") m <- lme4::lmer(Sepal.Length ~ Petal.Length + Petal.Width + Species + (1 | ID), data = iris) - out <- check_heterogeneity_bias(m, select = c("Sepal.Length", "Petal.Length"), group = "ID") + out <- check_heterogeneity_bias(m, select = c("Sepal.Length", "Petal.Length"), by = "ID") expect_equal(out, c("Petal.Length", "Petal.Width", "Species"), ignore_attr = TRUE) expect_output( print(out), "Possible heterogeneity bias due to following predictors: Petal\\.Length, Petal\\.Width, Species" ) - out <- check_heterogeneity_bias(m, select = ~ Sepal.Length + Petal.Length, group = ~ID) + out <- check_heterogeneity_bias(m, select = ~ Sepal.Length + Petal.Length, by = ~ID) expect_equal(out, c("Petal.Length", "Petal.Width", "Species"), ignore_attr = TRUE) expect_output( print(out),