inferences cleanup

vincentarelbundock · vincentarelbundock · commit 4a49dfcb9f39 · 2025-11-14T11:51:53.000-05:00
diff --git a/R/inferences.R b/R/inferences.R
@@ -137,6 +137,12 @@ inferences <- function(
     # dummy mfx for `estimator` and no marginaleffects object
     if (!inherits(mfx, "marginaleffects_internal")) {
         mfx <- new_marginaleffects_internal(NULL, call("predictions"))
+        if (isTRUE(checkmate::check_data_frame(x))) {
+            mfx@modeldata <- x
+            mfx@modeldata_available <- TRUE
+        } else {
+            mfx@modeldata_available <- FALSE
+        }
     }
 
     # supported classes
@@ -174,13 +180,18 @@ inferences <- function(
     )
 
     if (is.null(data_train)) {
-        if (!isTRUE(mfx@modeldata_available) && method != "conformal_split") {
+        if (!isTRUE(mfx@modeldata_available)) {
             checkmate::assert_data_frame(data_train, null.ok = FALSE)
         } else {
             data_train <- mfx@modeldata
         }
     }
 
+    if (is.null(data_test)) {
+        checkmate::assert_data_frame(mfx@newdata)
+        data_test <- mfx@newdata
+    }
+
     if (inherits(mfx@model, c("Learner", "model_fit", "workflow"))) {
         if (method == "simulation") {
             msg <- "Simulation-based inference is not supported for this class."
@@ -218,6 +229,7 @@ inferences <- function(
             conf_level = conf_level,
             conf_type = conf_type,
             estimator = estimator,
+            data_train = data_train,
             mfx = mfx,
             ...)
     } else if (method == "fwb") {
@@ -236,6 +248,7 @@ inferences <- function(
             conf_level = conf_level,
             conf_type = conf_type,
             estimator = estimator,
+            data_train = data_train,
             mfx = mfx,
             ...)
     } else if (method == "simulation") {
@@ -246,12 +259,11 @@ inferences <- function(
             mfx = mfx,
             ...)
     } else if (isTRUE(grepl("conformal", method))) {
-        data_test <- sanity_inferences_conformal(
+        sanity_inferences_conformal(
             mfx = mfx,
             score = conformal_score,
             method = method,
             data_calib = data_calib,
-            data_test = data_test,
             R = R
         )
 
diff --git a/R/inferences_boot.R b/R/inferences_boot.R
@@ -1,4 +1,4 @@
-inferences_boot <- function(x, R = 1000, conf_level = 0.95, conf_type = "perc", estimator = NULL, mfx = NULL, ...) {
+inferences_boot <- function(x, R = 1000, conf_level = 0.95, conf_type = "perc", estimator = NULL, data_train = NULL, mfx = NULL, ...) {
     insight::check_if_installed("boot")
 
     out <- x
@@ -17,7 +17,7 @@ inferences_boot <- function(x, R = 1000, conf_level = 0.95, conf_type = "perc",
         }
     }
 
-    args <- list("data" = mfx@modeldata, "statistic" = bootfun, R = R)
+    args <- list("data" = data_train, "statistic" = bootfun, R = R)
     args <- c(args, list(...))
     B <- do.call(boot::boot, args)
 
diff --git a/R/inferences_rsample.R b/R/inferences_rsample.R
@@ -1,11 +1,8 @@
-inferences_rsample <- function(x, R = 1000, conf_level = 0.95, conf_type = "perc", estimator = NULL, mfx = NULL, ...) {
+inferences_rsample <- function(x, R = 1000, conf_level = 0.95, conf_type = "perc", estimator = NULL, data_train = NULL, mfx = NULL, ...) {
     insight::check_if_installed("rsample")
 
     out <- x
 
-    # Get modeldata from mfx object
-    modeldata <- mfx@modeldata
-
     if (!is.null(estimator)) {
         bootfun <- function(split, ...) {
             d <- rsample::analysis(split)
@@ -38,15 +35,7 @@ inferences_rsample <- function(x, R = 1000, conf_level = 0.95, conf_type = "perc
 
     args <- list("apparent" = TRUE)
     args[["times"]] <- R
-
-    # Sometimes modeldata is empty (ex: `tidymodels`)
-    if (nrow(modeldata) > 0) {
-        args[["data"]] <- modeldata
-    } else if (nrow(mfx@modeldata) > 0) {
-        args[["data"]] <- mfx@modeldata
-    } else {
-        args[["data"]] <- mfx@newdata
-    }
+    args[["data"]] <- data_train
 
     args <- c(args, list(...))
     if ("group" %in% ...names()) {
diff --git a/R/refit.R b/R/refit.R
@@ -30,28 +30,37 @@ refit.marginaleffects <- function(object, data = NULL, newdata = NULL, vcov = NU
 
     model <- mfx@model
 
+    fit_again <- function(model, data) {
+        # Try stats::update first
+        model <- tryCatch(
+            stats::update(model, data = data),
+            error = function(e) NULL
+        )
+        # Fallback: modify call and re-evaluate
+        if (is.null(model)) {
+            if (is.call(mfx@call_model) && "data" %in% names(mfx@call_model)) {
+                call_new <- mfx@call_model
+                call_new$data <- data
+                model <- try(eval(call_new), silent = TRUE)
+                if (inherits(model, "try-error")) {
+                    stop("Failed to refit the model.", call. = FALSE)
+                }
+            } else {
+                stop("Failed to refit model: no update method available", call. = FALSE)
+            }
+        }
+        return(model)
+    }
+
     # Step 1: Refit model if data is supplied
     if (!is.null(data)) {
         # For workflows, tidymodels provides its own fit.workflow method
         if (inherits(model, "workflow")) {
             model <- generics::fit(model, data = data)
+        } else if (inherits(model, "model_fit")) {
+            model <- fit_again(model[["fit"]], data = data)
         } else {
-            # Try stats::update first
-            model <- tryCatch(
-                stats::update(model, data = data),
-                error = function(e) NULL
-            )
-
-            # Fallback: modify call and re-evaluate
-            if (is.null(model)) {
-                if (is.call(mfx@call_model) && "data" %in% names(mfx@call_model)) {
-                    call_new <- mfx@call_model
-                    call_new$data <- data
-                    model <- eval(call_new)
-                } else {
-                    stop("Failed to refit model: no update method available")
-                }
-            }
+            model <- fit_again(model, data = data)
         }
     }
 
diff --git a/R/sanity_inferences.R b/R/sanity_inferences.R
@@ -1,4 +1,4 @@
-sanity_inferences_conformal <- function(mfx, score, method, data_calib, data_test, R) {
+sanity_inferences_conformal <- function(mfx, score, method, data_calib, R) {
     checkmate::assert_choice(
         score,
         choices = c("residual_abs", "residual_sq", "softmax")
@@ -56,11 +56,6 @@ sanity_inferences_conformal <- function(mfx, score, method, data_calib, data_tes
         }
     }
 
-    if (is.null(data_test)) {
-        checkmate::assert_data_frame(mfx@newdata)
-        data_test <- mfx@newdata
-    }
-
     if (method %in% c("conformal_split", "conformal_quantile")) {
         checkmate::assert_data_frame(data_calib, null.ok = FALSE)
     }
@@ -74,7 +69,7 @@ sanity_inferences_conformal <- function(mfx, score, method, data_calib, data_tes
         stop_sprintf(msg)
     }
 
-    return(data_test)
+    return(invisible(NULL))
 }
 
 
diff --git a/inst/tinytest/test-inferences_rsample.R b/inst/tinytest/test-inferences_rsample.R
@@ -40,7 +40,7 @@ x <- mod |>
     components("inferences") |>
     suppressWarnings()
 expect_inherits(x, "bootstraps")
-nd <<- datagrid(Sepal.Length = range, model = mod)
+nd <- datagrid(Sepal.Length = range, model = mod)
 x <- mod |>
     comparisons(variables = "Sepal.Width", newdata = nd) |>
     inferences(method = "rsample", R = R) |>
@@ -82,7 +82,7 @@ model <- coxph(
     Surv(dtime, death) ~ hormon * factor(grade) + ns(age, df = 2),
     data = rotterdam
 )
-nd <<- datagrid(
+nd <- datagrid(
     hormon = unique,
     grade = unique,
     dtime = seq(36, 7043, length.out = 25),
diff --git a/inst/tinytest/test-pkg-fixest.R b/inst/tinytest/test-pkg-fixest.R
@@ -149,7 +149,7 @@ dt <- mtcars
 dt$cyl <- factor(dt$cyl)
 fit1 <- suppressMessages(feols(mpg ~ 0 | carb | vs ~ am, data = dt))
 fit2 <- suppressMessages(feols(mpg ~ cyl | carb | vs ~ am, data = dt))
-fit3 <- suppressMessages(feols(mpg ~ 0 | carb | vs:cyl ~ am:cyl, data = dt))
+fit3 <- suppressWarnings(feols(mpg ~ 0 | carb | vs:cyl ~ am:cyl, data = dt))
 mfx1 <- slopes(fit1)
 mfx2 <- slopes(fit2)
 mfx3 <- slopes(fit3)
diff --git a/inst/tinytest/test-pkg-tidymodels.R b/inst/tinytest/test-pkg-tidymodels.R
@@ -38,14 +38,17 @@ expect_true(nrow(mfx) > 0)
 
 
 # conformal
+set.seed(48103)
 dat <- get_dataset("penguins", "palmerpenguins") |> na.omit()
+dat <- dat[sample(1:nrow(dat), nrow(dat)), ] # shuffle species
 mod <- set_engine(linear_reg(), "lm") |>
     fit(body_mass_g ~ bill_length_mm + flipper_length_mm + species,
         data = na.omit(dat))
 p <- predictions(mod, newdata = dat[1:100, ]) |>
     inferences(
         R = 3,
         method = "conformal_cv+",
+        data_train = dat[1:100, ],
         data_calib = dat[101:nrow(dat), ]
     )
 expect_inherits(p, "predictions")
@@ -65,6 +68,7 @@ expect_true("std.error" %in% colnames(p))
 p <- predictions(mod, newdata = bikes[1:200, ], vcov = FALSE) |>
     inferences(
         method = "conformal_split",
+        data_train = bikes[1:200, ],
         data_calib = bikes[201:nrow(bikes), ])
 expect_inherits(p, "predictions")
 
@@ -123,3 +127,39 @@ lr_fit <- lr_wf |>
 mfx <- slopes(lr_fit, newdata = my_data, variable = "x")
 expect_equivalent(mfx$x, my_data$x)
 expect_equivalent(mfx$y, my_data$y)
+
+
+# Bootstrap
+set.seed(48103)
+nobs <- 50
+wf <- workflow() |>
+    add_model(boost_tree(mode = "regression")) |>
+    add_recipe(
+        recipe(Sepal.Length ~ ., data = iris) |>
+            # 1. Convert character predictors to factors (if any)
+            step_string2factor(all_nominal_predictors()) |>
+            # 2. Dummy-code all nominal predictors
+            step_dummy(all_nominal_predictors())
+    ) |>
+    fit(iris)
+mfx1 <- comparisons(wf, newdata = iris, variable = "Sepal.Width", vcov = FALSE)
+mfx2 <- inferences(mfx1, R = 100, method = "rsample", data_train = iris) |>
+    suppressWarnings()
+expect_false("conf.low" %in% colnames(mfx1))
+expect_true("conf.low" %in% colnames(mfx2))
+
+
+# Bootstrap for some supported models but not all
+z <- boost_tree("regression") |>
+    fit(hp ~ ., data = mtcars)
+comparisons(z, variables = "mpg", newdata = mtcars, vcov = FALSE) |>
+    inferences(R = 10, method = "boot", data_train = mtcars) |>
+    suppressWarnings() |>
+    expect_error(pattern = "Failed to refit")
+z <- linear_reg() |>
+    fit(hp ~ ., data = mtcars)
+cmp <- comparisons(z, variables = "mpg", newdata = mtcars, vcov = FALSE) |>
+    inferences(R = 10, method = "boot", data_train = mtcars) |>
+    suppressWarnings()
+expect_inherits(cmp, "comparisons")
+expect_true("conf.low" %in% colnames(cmp))

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-inferences_boot <- function(x, R = 1000, conf_level = 0.95, conf_type = "perc", estimator = NULL, mfx = NULL, ...) {`
	`1`	`+inferences_boot <- function(x, R = 1000, conf_level = 0.95, conf_type = "perc", estimator = NULL, data_train = NULL, mfx = NULL, ...) {`
`2`	`2`	`insight::check_if_installed("boot")`
`3`	`3`
`4`	`4`	`out <- x`
`@@ -17,7 +17,7 @@ inferences_boot <- function(x, R = 1000, conf_level = 0.95, conf_type = "perc",`
`17`	`17`	`}`
`18`	`18`	`}`
`19`	`19`
`20`		`- args <- list("data" = mfx@modeldata, "statistic" = bootfun, R = R)`
	`20`	`+ args <- list("data" = data_train, "statistic" = bootfun, R = R)`
`21`	`21`	`args <- c(args, list(...))`
`22`	`22`	`B <- do.call(boot::boot, args)`
`23`	`23`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-sanity_inferences_conformal <- function(mfx, score, method, data_calib, data_test, R) {`
	`1`	`+sanity_inferences_conformal <- function(mfx, score, method, data_calib, R) {`
`2`	`2`	`checkmate::assert_choice(`
`3`	`3`	`score,`
`4`	`4`	`choices = c("residual_abs", "residual_sq", "softmax")`
`@@ -56,11 +56,6 @@ sanity_inferences_conformal <- function(mfx, score, method, data_calib, data_tes`
`56`	`56`	`}`
`57`	`57`	`}`
`58`	`58`
`59`		`- if (is.null(data_test)) {`
`60`		`- checkmate::assert_data_frame(mfx@newdata)`
`61`		`- data_test <- mfx@newdata`
`62`		`- }`
`63`		`-`
`64`	`59`	`if (method %in% c("conformal_split", "conformal_quantile")) {`
`65`	`60`	`checkmate::assert_data_frame(data_calib, null.ok = FALSE)`
`66`	`61`	`}`
`@@ -74,7 +69,7 @@ sanity_inferences_conformal <- function(mfx, score, method, data_calib, data_tes`
`74`	`69`	`stop_sprintf(msg)`
`75`	`70`	`}`
`76`	`71`
`77`		`- return(data_test)`
	`72`	`+ return(invisible(NULL))`
`78`	`73`	`}`
`79`	`74`
`80`	`75`