R2 incorrect for methods that use update(model, ~1) and have missing data (#804)

strengejacke · web-flow · commit 3886b63378b7 · 2025-03-21T15:30:08.000+01:00
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: performance
 Title: Assessment of Regression Models Performance
-Version: 0.13.0.1
+Version: 0.13.0.2
 Authors@R:
     c(person(given = "Daniel",
              family = "Lüdecke",
@@ -126,6 +126,7 @@ Suggests:
     multimode,
     nestedLogit,
     nlme,
+    nnet,
     nonnest2,
     ordinal,
     parallel,
@@ -161,3 +162,4 @@ Config/Needs/website:
     r-lib/pkgdown,
     easystats/easystatstemplate
 Config/rcmdcheck/ignore-inconsequential-notes: true
+Remotes: easystats/insight
diff --git a/NEWS.md b/NEWS.md
@@ -8,6 +8,14 @@
   the full model), or can return singularity checks for each random effects term
   separately.
 
+## Bug fixes
+
+* Fixed issue with wrong computation of pseudo-R2 for some models where the
+  base-model (null model) was updated using the original data, which could
+  include missing values. Now the model frame is used, ensuring the correct
+  number of observations in the returned base-model, thus calculating the correct
+  log-likelihood and returning the correct pseudo-R2.
+
 # performance 0.13.0
 
 ## Breaking changes
diff --git a/R/r2_coxsnell.R b/R/r2_coxsnell.R
@@ -218,25 +218,20 @@ r2_coxsnell.svycoxph <- function(model, ...) {
 
 #' @export
 r2_coxsnell.multinom <- function(model, ...) {
-  l_base <- insight::get_loglikelihood(stats::update(model, ~1, trace = FALSE))
+  l_base <- insight::get_loglikelihood(insight::null_model(model))
   .r2_coxsnell(model, l_base)
 }
 
 #' @export
-r2_coxsnell.clm2 <- function(model, ...) {
-  l_base <- insight::get_loglikelihood(stats::update(model, location = ~1, scale = ~1))
-  .r2_coxsnell(model, l_base)
-}
+r2_coxsnell.clm2 <- r2_coxsnell.multinom
 
 #' @export
-r2_coxsnell.bayesx <- function(model, ...) {
-  junk <- utils::capture.output(l_base <- insight::get_loglikelihood(stats::update(model, ~1))) # nolint
-  .r2_coxsnell(model, l_base)
-}
+r2_coxsnell.bayesx <- r2_coxsnell.multinom
 
 #' @export
 r2_coxsnell.clm <- function(model, ...) {
-  l_base <- insight::get_loglikelihood(stats::update(model, ~1))
+  l_base <- insight::get_loglikelihood(insight::null_model(model))
+
   # if no loglik, return NA
   if (length(as.numeric(l_base)) == 0) {
     return(NULL)
diff --git a/R/r2_mcfadden.R b/R/r2_mcfadden.R
@@ -73,7 +73,7 @@ r2_mcfadden.glm <- function(model, verbose = TRUE, ...) {
     return(NULL)
   }
 
-  l_null <- insight::get_loglikelihood(stats::update(model, ~1))
+  l_null <- insight::get_loglikelihood(insight::null_model(model))
   .r2_mcfadden(model, l_null)
 }
 
@@ -162,23 +162,20 @@ r2_mcfadden.vglm <- function(model, ...) {
     insight::format_error("Can't get log-likelihood when `summ` is not zero.")
   }
 
-  l_null <- insight::get_loglikelihood(stats::update(model, ~1))
+  l_null <- insight::get_loglikelihood(insight::null_model(model))
   .r2_mcfadden(model, l_null)
 }
 
 
 #' @export
 r2_mcfadden.clm2 <- function(model, ...) {
-  l_null <- insight::get_loglikelihood(stats::update(model, location = ~1, scale = ~1))
+  l_null <- insight::get_loglikelihood(insight::null_model(model))
   .r2_mcfadden(model, l_null)
 }
 
 
 #' @export
-r2_mcfadden.multinom <- function(model, ...) {
-  l_null <- insight::get_loglikelihood(stats::update(model, ~1, trace = FALSE))
-  .r2_mcfadden(model, l_null)
-}
+r2_mcfadden.multinom <- r2_mcfadden.clm2
 
 
 #' @export
diff --git a/R/r2_mckelvey.R b/R/r2_mckelvey.R
@@ -74,13 +74,8 @@ r2_mckelvey.default <- function(model) {
 }
 
 
-.null_model <- function(model) {
-  stats::update(model, ~1)
-}
-
-
 .get_poisson_variance <- function(model) {
-  mu <- exp(stats::coef(.null_model(model)))
+  mu <- exp(stats::coef(insight::null_model(model)))
   if (is.na(mu)) {
     return(0)
   }
diff --git a/R/r2_nagelkerke.R b/R/r2_nagelkerke.R
@@ -170,19 +170,19 @@ r2_nagelkerke.negbinmfx <- r2_nagelkerke.logitmfx
 
 #' @export
 r2_nagelkerke.multinom <- function(model, ...) {
-  l_base <- insight::get_loglikelihood(stats::update(model, ~1, trace = FALSE))
+  l_base <- insight::get_loglikelihood(insight::null_model(model))
   .r2_nagelkerke(model, l_base)
 }
 
 #' @export
 r2_nagelkerke.clm2 <- function(model, ...) {
-  l_base <- insight::get_loglikelihood(stats::update(model, location = ~1, scale = ~1))
+  l_base <- insight::get_loglikelihood(insight::null_model(model))
   .r2_nagelkerke(model, l_base)
 }
 
 #' @export
 r2_nagelkerke.clm <- function(model, ...) {
-  l_base <- insight::get_loglikelihood(stats::update(model, ~1))
+  l_base <- insight::get_loglikelihood(insight::null_model(model))
   # if no loglik, return NA
   if (length(as.numeric(l_base)) == 0) {
     return(NULL)
diff --git a/tests/testthat/test-check_singularity.R b/tests/testthat/test-check_singularity.R
@@ -45,13 +45,13 @@ test_that("check_singularity", {
   expect_true(check_singularity(m2))
 
   data(Salamanders, package = "glmmTMB")
-  m <- glmmTMB::glmmTMB(
+  m <- suppressWarnings(glmmTMB::glmmTMB(
     count ~ spp + mined + (1 | site),
     data = Salamanders[Salamanders$count > 0, , drop = FALSE],
     family = glmmTMB::truncated_nbinom2(),
     ziformula = ~ spp + (1 | site),
     dispformula = ~ spp + (1 | site)
-  )
+  ))
   out <- check_singularity(m, check = "terms")
   expect_identical(
     out,
diff --git a/tests/testthat/test-r2_nagelkerke.R b/tests/testthat/test-r2_nagelkerke.R
@@ -17,3 +17,36 @@ test_that("r2_nagelkerke", {
     }
   )
 })
+
+test_that("r2_nagelkerke, multinom, correct base-model with NA", {
+  skip_on_cran()
+  skip_if_not_installed("nnet")
+
+  n_obs <- 1000
+  softmax <- function(x) {
+    exp(x - max(x)) / sum(exp(x - max(x)))
+  }
+  sample_y <- function(x) {
+    sample(1:3, size = 1, prob = softmax(c(0.25 * x, -0.1 * x, 0 * x)))
+  }
+  set.seed(123)
+  sim_df <- data.frame(x = rnorm(n_obs, 0, 1), y = NA)
+
+  for (i in 1:nrow(sim_df)) {
+    sim_df$y[i] <- sample_y(sim_df$x[i])
+  }
+
+  sim_df$x[1:500] <- NA
+  sim_df2 <- sim_df[!is.na(sim_df$x), ]
+
+  m1 <- nnet::multinom(y ~ x, data = sim_df, trace = FALSE)
+  m2 <- nnet::multinom(y ~ x, data = sim_df2, trace = FALSE)
+
+  out1 <- r2_nagelkerke(m1)
+  out2 <- r2_nagelkerke(m2)
+  expect_equal(out1, out2, tolerance = 1e-4, ignore_attr = TRUE)
+
+  out1 <- r2_mcfadden(m1)
+  out2 <- r2_mcfadden(m2)
+  expect_equal(out1$R2, out2$R2, tolerance = 1e-4, ignore_attr = TRUE)
+})

Original file line number	Diff line number	Diff line change
`@@ -73,7 +73,7 @@ r2_mcfadden.glm <- function(model, verbose = TRUE, ...) {`
`73`	`73`	`return(NULL)`
`74`	`74`	`}`
`75`	`75`
`76`		`- l_null <- insight::get_loglikelihood(stats::update(model, ~1))`
	`76`	`+ l_null <- insight::get_loglikelihood(insight::null_model(model))`
`77`	`77`	`.r2_mcfadden(model, l_null)`
`78`	`78`	`}`
`79`	`79`
`@@ -162,23 +162,20 @@ r2_mcfadden.vglm <- function(model, ...) {`
`162`	`162`	insight::format_error("Can't get log-likelihood when `summ` is not zero.")
`163`	`163`	`}`
`164`	`164`
`165`		`- l_null <- insight::get_loglikelihood(stats::update(model, ~1))`
	`165`	`+ l_null <- insight::get_loglikelihood(insight::null_model(model))`
`166`	`166`	`.r2_mcfadden(model, l_null)`
`167`	`167`	`}`
`168`	`168`
`169`	`169`
`170`	`170`	`#' @export`
`171`	`171`	`r2_mcfadden.clm2 <- function(model, ...) {`
`172`		`- l_null <- insight::get_loglikelihood(stats::update(model, location = ~1, scale = ~1))`
	`172`	`+ l_null <- insight::get_loglikelihood(insight::null_model(model))`
`173`	`173`	`.r2_mcfadden(model, l_null)`
`174`	`174`	`}`
`175`	`175`
`176`	`176`
`177`	`177`	`#' @export`
`178`		`-r2_mcfadden.multinom <- function(model, ...) {`
`179`		`- l_null <- insight::get_loglikelihood(stats::update(model, ~1, trace = FALSE))`
`180`		`- .r2_mcfadden(model, l_null)`
`181`		`-}`
	`178`	`+r2_mcfadden.multinom <- r2_mcfadden.clm2`
`182`	`179`
`183`	`180`
`184`	`181`	`#' @export`
Original file line number	Diff line number	Diff line change
`@@ -74,13 +74,8 @@ r2_mckelvey.default <- function(model) {`
`74`	`74`	`}`
`75`	`75`
`76`	`76`
`77`		`-.null_model <- function(model) {`
`78`		`- stats::update(model, ~1)`
`79`		`-}`
`80`		`-`
`81`		`-`
`82`	`77`	`.get_poisson_variance <- function(model) {`
`83`		`- mu <- exp(stats::coef(.null_model(model)))`
	`78`	`+ mu <- exp(stats::coef(insight::null_model(model)))`
`84`	`79`	`if (is.na(mu)) {`
`85`	`80`	`return(0)`
`86`	`81`	`}`