Skip to content

Commit 3b72ffa

Browse files
authored
Merge branch 'main' into strengejacke/issue697
2 parents 8137ac3 + 01eff88 commit 3b72ffa

8 files changed

+161
-80
lines changed

R/check_model.R

+103-36
Original file line numberDiff line numberDiff line change
@@ -218,9 +218,9 @@ check_model.default <- function(x,
218218
if (minfo$is_bayesian) {
219219
suppressWarnings(.check_assumptions_stan(x, ...))
220220
} else if (minfo$is_linear) {
221-
suppressWarnings(.check_assumptions_linear(x, minfo, residual_type, verbose, ...))
221+
suppressWarnings(.check_assumptions_linear(x, minfo, check, residual_type, verbose, ...))
222222
} else {
223-
suppressWarnings(.check_assumptions_glm(x, minfo, residual_type, verbose, ...))
223+
suppressWarnings(.check_assumptions_glm(x, minfo, check, residual_type, verbose, ...))
224224
},
225225
error = function(e) {
226226
e
@@ -237,6 +237,15 @@ check_model.default <- function(x,
237237
)
238238
}
239239

240+
# did Q-Q plot work with simulated residuals?
241+
if (verbose && is.null(assumptions_data$QQ) && residual_type == "simulated") {
242+
insight::format_warning(paste0(
243+
"Cannot simulate residuals for models of class `",
244+
class(x)[1],
245+
"`. Please try `check_model(..., residual_type = \"normal\")` instead."
246+
))
247+
}
248+
240249
# try to find sensible default for "type" argument
241250
suggest_dots <- (minfo$is_bernoulli || minfo$is_count || minfo$is_ordinal || minfo$is_categorical || minfo$is_multinomial) # nolint
242251
if (missing(type) && suggest_dots) {
@@ -412,26 +421,57 @@ check_model.DHARMa <- check_model.performance_simres
412421

413422
# compile plots for checks of linear models ------------------------
414423

415-
.check_assumptions_linear <- function(model, model_info, residual_type = "normal", verbose = TRUE, ...) {
424+
.check_assumptions_linear <- function(model, model_info, check = "all", residual_type = "normal", verbose = TRUE, ...) {
416425
dat <- list()
417426

418-
dat$VIF <- .diag_vif(model, verbose = verbose)
419-
dat$QQ <- switch(residual_type,
420-
simulated = simulate_residuals(model, ...),
421-
.diag_qq(model, model_info = model_info, verbose = verbose)
422-
)
423-
dat$REQQ <- .diag_reqq(model, level = 0.95, model_info = model_info, verbose = verbose)
424-
dat$NORM <- .diag_norm(model, verbose = verbose)
425-
dat$NCV <- .diag_ncv(model, verbose = verbose)
426-
dat$HOMOGENEITY <- .diag_homogeneity(model, verbose = verbose)
427-
dat$OUTLIERS <- .safe(check_outliers(model, method = "cook"))
428-
if (is.null(dat$OUTLIERS)) {
429-
threshold <- NULL
430-
} else {
431-
threshold <- attributes(dat$OUTLIERS)$threshold$cook
427+
# multicollinearity --------------
428+
if (any(c("all", "vif") %in% check)) {
429+
dat$VIF <- .diag_vif(model, verbose = verbose)
430+
}
431+
432+
# Q-Q plot (normality/uniformity of residuals) --------------
433+
if (any(c("all", "qq") %in% check)) {
434+
dat$QQ <- switch(residual_type,
435+
simulated = .safe(simulate_residuals(model, ...)),
436+
.diag_qq(model, model_info = model_info, verbose = verbose)
437+
)
438+
}
439+
440+
# Random Effects Q-Q plot (normality of BLUPs) --------------
441+
if (any(c("all", "reqq") %in% check)) {
442+
dat$REQQ <- .diag_reqq(model, level = 0.95, model_info = model_info, verbose = verbose)
443+
}
444+
445+
# normal-curve plot (normality of residuals) --------------
446+
if (any(c("all", "normality") %in% check)) {
447+
dat$NORM <- .diag_norm(model, verbose = verbose)
448+
}
449+
450+
# non-constant variance (heteroskedasticity, liniearity) --------------
451+
if (any(c("all", "ncv", "linearity") %in% check)) {
452+
dat$NCV <- .diag_ncv(model, verbose = verbose)
453+
}
454+
455+
# homogeneity of variance --------------
456+
if (any(c("all", "homogeneity") %in% check)) {
457+
dat$HOMOGENEITY <- .diag_homogeneity(model, verbose = verbose)
458+
}
459+
460+
# outliers --------------
461+
if (any(c("all", "outliers") %in% check)) {
462+
dat$OUTLIERS <- .safe(check_outliers(model, method = "cook"))
463+
if (is.null(dat$OUTLIERS)) {
464+
threshold <- NULL
465+
} else {
466+
threshold <- attributes(dat$OUTLIERS)$threshold$cook
467+
}
468+
dat$INFLUENTIAL <- .influential_obs(model, threshold = threshold)
469+
}
470+
471+
# posterior predictive checks --------------
472+
if (any(c("all", "pp_check") %in% check)) {
473+
dat$PP_CHECK <- .safe(check_predictions(model, ...))
432474
}
433-
dat$INFLUENTIAL <- .influential_obs(model, threshold = threshold)
434-
dat$PP_CHECK <- .safe(check_predictions(model, ...))
435475

436476
dat <- insight::compact_list(dat)
437477
class(dat) <- c("check_model", "see_check_model")
@@ -442,28 +482,55 @@ check_model.DHARMa <- check_model.performance_simres
442482

443483
# compile plots for checks of generalized linear models ------------------------
444484

445-
.check_assumptions_glm <- function(model, model_info, residual_type = "simulated", verbose = TRUE, ...) {
485+
.check_assumptions_glm <- function(model, model_info, check = "all", residual_type = "simulated", verbose = TRUE, ...) {
446486
dat <- list()
447487

448-
dat$VIF <- .diag_vif(model, verbose = verbose)
449-
dat$QQ <- switch(residual_type,
450-
simulated = simulate_residuals(model, ...),
451-
.diag_qq(model, model_info = model_info, verbose = verbose)
452-
)
453-
dat$HOMOGENEITY <- .diag_homogeneity(model, verbose = verbose)
454-
dat$REQQ <- .diag_reqq(model, level = 0.95, model_info = model_info, verbose = verbose)
455-
dat$OUTLIERS <- .safe(check_outliers(model, method = "cook"))
456-
if (is.null(dat$OUTLIERS)) {
457-
threshold <- NULL
458-
} else {
459-
threshold <- attributes(dat$OUTLIERS)$threshold$cook
488+
# multicollinearity --------------
489+
if (any(c("all", "vif") %in% check)) {
490+
dat$VIF <- .diag_vif(model, verbose = verbose)
491+
}
492+
493+
# Q-Q plot (normality/uniformity of residuals) --------------
494+
if (any(c("all", "qq") %in% check)) {
495+
dat$QQ <- switch(residual_type,
496+
simulated = .safe(simulate_residuals(model, ...)),
497+
.diag_qq(model, model_info = model_info, verbose = verbose)
498+
)
499+
}
500+
501+
# homogeneity of variance --------------
502+
if (any(c("all", "homogeneity") %in% check)) {
503+
dat$HOMOGENEITY <- .diag_homogeneity(model, verbose = verbose)
504+
}
505+
506+
# Random Effects Q-Q plot (normality of BLUPs) --------------
507+
if (any(c("all", "reqq") %in% check)) {
508+
dat$REQQ <- .diag_reqq(model, level = 0.95, model_info = model_info, verbose = verbose)
509+
}
510+
511+
# outliers --------------
512+
if (any(c("all", "outliers") %in% check)) {
513+
dat$OUTLIERS <- .safe(check_outliers(model, method = "cook"))
514+
if (is.null(dat$OUTLIERS)) {
515+
threshold <- NULL
516+
} else {
517+
threshold <- attributes(dat$OUTLIERS)$threshold$cook
518+
}
519+
dat$INFLUENTIAL <- .influential_obs(model, threshold = threshold)
460520
}
461-
dat$INFLUENTIAL <- .influential_obs(model, threshold = threshold)
462-
dat$PP_CHECK <- .safe(check_predictions(model, ...))
463-
if (isTRUE(model_info$is_binomial)) {
521+
522+
# posterior predictive checks --------------
523+
if (any(c("all", "pp_check") %in% check)) {
524+
dat$PP_CHECK <- .safe(check_predictions(model, ...))
525+
}
526+
527+
# binned residuals for bernoulli/binomial --------------
528+
if (isTRUE(model_info$is_binomial) && any(c("all", "binned_residuals") %in% check)) {
464529
dat$BINNED_RESID <- .safe(binned_residuals(model, verbose = verbose, ...))
465530
}
466-
if (isTRUE(model_info$is_count)) {
531+
532+
# misspecified dispersion and zero-inflation --------------
533+
if (isTRUE(model_info$is_count) && any(c("all", "overdispersion") %in% check)) {
467534
dat$OVERDISPERSION <- .diag_overdispersion(model)
468535
}
469536

R/check_zeroinflation.R

+14-13
Original file line numberDiff line numberDiff line change
@@ -30,18 +30,19 @@
3030
#'
3131
#' @section Tests based on simulated residuals:
3232
#' For certain models, resp. model from certain families, tests are based on
33-
#' [`simulated_residuals()`]. These are usually more accurate for tests than the
34-
#' traditionally used Pearson residuals. However, when simulating from more
35-
#' complex model, such as mixed models or models with zero-inflation, there are
36-
#' several important considerations. Arguments specified in `...` are passed to
37-
#' [`simulate_residuals()`], which relies on [`DHARMa::simulateResiduals()`] (and
38-
#' therefore, arguments in `...` are passed further down to _DHARMa_). The
39-
#' defaults in DHARMa are set on the most conservative option that works for
40-
#' all models. However, in many cases, the help advises to use different settings
41-
#' in particular situations or for particular models. It is recommended to read
42-
#' the 'Details' in `?DHARMa::simulateResiduals` closely to understand the
43-
#' implications of the simulation process and which arguments should be modified
44-
#' to get the most accurate results.
33+
#' simulated residuals (see [`simulated_residual()`]). These are usually more
34+
#' accurate for testing such models than the traditionally used Pearson residuals.
35+
#' However, when simulating from more complex models, such as mixed models or
36+
#' models with zero-inflation, there are several important considerations.
37+
#' Arguments specified in `...` are passed to [`simulate_residuals()`], which
38+
#' relies on [`DHARMa::simulateResiduals()`] (and therefore, arguments in `...`
39+
#' are passed further down to _DHARMa_). The defaults in DHARMa are set on the
40+
#' most conservative option that works for all models. However, in many cases,
41+
#' the help advises to use different settings in particular situations or for
42+
#' particular models. It is recommended to read the 'Details' in
43+
#' `?DHARMa::simulateResiduals` closely to understand the implications of the
44+
#' simulation process and which arguments should be modified to get the most
45+
#' accurate results.
4546
#'
4647
#' @family functions to check model assumptions and and assess model quality
4748
#'
@@ -87,7 +88,7 @@ check_zeroinflation.default <- function(x, tolerance = 0.05, ...) {
8788
not_supported <- c("fixest", "glmx")
8889

8990
# for models with zero-inflation component or negative binomial families,
90-
# we use simulated_residuals()
91+
# we use simulate_residuals()
9192
if (!inherits(x, not_supported) && (model_info$is_zero_inflated || model_info$is_negbin || model_info$family == "genpois")) { # nolint
9293
if (missing(tolerance)) {
9394
tolerance <- 0.1

R/simulate_residuals.R

+3-3
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@
2424
#' @section Tests based on simulated residuals:
2525
#' For certain models, resp. model from certain families, tests like
2626
#' [`check_zeroinflation()`] or [`check_overdispersion()`] are based on
27-
#' `simulated_residuals()`. These are usually more accurate for such tests than
27+
#' simulated residuals. These are usually more accurate for such tests than
2828
#' the traditionally used Pearson residuals. However, when simulating from more
29-
#' complex model, such as mixed models or models with zero-inflation, there are
29+
#' complex models, such as mixed models or models with zero-inflation, there are
3030
#' several important considerations. `simulate_residuals()` relies on
3131
#' [`DHARMa::simulateResiduals()`], and additional arguments specified in `...`
3232
#' are passed further down to that function. The defaults in DHARMa are set on
@@ -79,7 +79,7 @@ print.performance_simres <- function(x, ...) {
7979
msg <- paste0(
8080
"Simulated residuals from a model of class `", class(x$fittedModel)[1],
8181
"` based on ", x$nSim, " simulations. Use `check_residuals()` to check ",
82-
"uniformity of residuals. It is recommended to refer to `?DHARMa::simulateReisudals`",
82+
"uniformity of residuals. It is recommended to refer to `?DHARMa::simulateResiudals`",
8383
" and `vignette(\"DHARMa\")` for more information about different settings",
8484
" in particular situations or for particular models.\n"
8585
)

man/check_overdispersion.Rd

+13-12
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/check_residuals.Rd

+2-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/check_zeroinflation.Rd

+13-12
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/simulate_residuals.Rd

+2-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-check_model.R

+11
Original file line numberDiff line numberDiff line change
@@ -69,3 +69,14 @@ test_that("`check_model()` warnings for tweedie", {
6969
)
7070
)
7171
})
72+
73+
74+
test_that("`check_model()` warnings for zero-infl", {
75+
skip_if_not_installed("pscl")
76+
data(bioChemists, package = "pscl")
77+
model <- pscl::zeroinfl(
78+
art ~ fem + mar + kid5 + ment | kid5 + phd,
79+
data = bioChemists
80+
)
81+
expect_message(expect_warning(check_model(model, verbose = TRUE), regex = "Cannot simulate"), regex = "Homogeneity")
82+
})

0 commit comments

Comments
 (0)