prep for cran 1.1.0

mdancho84 · mdancho84 · commit df1dedcef716 · 2025-09-03T12:19:58.000-04:00
diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION
@@ -1,3 +1,3 @@
-Version: 1.0.4
-Date: 2024-07-16 21:37:58 UTC
-SHA: eeb0f2859266149755cc1dc74e13edbafc372568
+Version: 1.0.5
+Date: 2025-08-28 15:36:47 UTC
+SHA: 8972baccbed49fbf5edf54a489c1b01edeb9459d
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: modeltime.ensemble
 Type: Package
 Title: Ensemble Algorithms for Time Series Forecasting with Modeltime
-Version: 1.0.5.9000
+Version: 1.1.0
 Authors@R: c(
     person("Matt", "Dancho", email = "mdancho@business-science.io", role = c("aut", "cre")),
     person("Business Science", role = "cph")
@@ -16,10 +16,10 @@ License: MIT + file LICENSE
 Encoding: UTF-8
 Depends: 
     modeltime (>= 1.2.3),
-    modeltime.resample (>= 0.2.1),
+    modeltime.resample (>= 0.3.0),
     R (>= 3.5)
 Imports: 
-    tune (>= 0.1.2),
+    tune (>= 2.0.0),
     rsample,
     yardstick,
     workflows (>= 0.2.1),
diff --git a/R/ensemble_model_spec.R b/R/ensemble_model_spec.R
@@ -233,64 +233,66 @@ generate_stacking_results <- function(object,
                                       grid          = 6,
                                       control       = control_grid()) {
 
-    # 1. Fit Resamples ----
-    # - This is now performed separately with modeltime_fit_resamples()
+    if (control$verbose) tictoc::tic()
 
     # 2. Wrangle Predictions ----
     predictions_tbl <- modeltime.resample::unnest_modeltime_resamples(object)
 
-    # Target Variable is the name in the data
+    # Target variable name comes right after .model_desc (new tune) or .row (old tune)
     if (utils::packageVersion("tune") >= "1.3.0.9006") {
         target_text <- predictions_tbl %>%
             modeltime.resample::get_target_text_from_resamples(column_before_target = ".model_desc")
     } else {
         target_text <- predictions_tbl %>%
             modeltime.resample::get_target_text_from_resamples(column_before_target = ".row")
     }
-    target_var  <- rlang::sym(target_text)
+    target_var <- rlang::sym(target_text)
 
+    # Keep resample id so keys are unique across slices
     predictions_tbl <- predictions_tbl %>%
-        dplyr::select(.row_id, .model_id, .pred, !! target_var)
+        dplyr::select(.resample_id, .row_id, .model_id, .pred, !!target_var)
 
-    # * Actuals By Row ID ----
+    # Defuse any list-column predictions (can arise when duplicates exist pre-pivot)
+    if (is.list(predictions_tbl$.pred)) {
+        predictions_tbl <- predictions_tbl %>%
+            dplyr::mutate(.pred = purrr::map_dbl(.pred, ~ if (length(.x)) as.numeric(.x)[1] else NA_real_))
+    }
+
+    # * Actuals: one row per resample + row id
     actuals_by_rowid_tbl <- predictions_tbl %>%
-        dplyr::filter(.model_id %in% unique(.model_id)[1]) %>%
-        dplyr::select(.row_id, !! target_var)
+        dplyr::distinct(.resample_id, .row_id, !!target_var)
 
-    # * Get Predictions by Row ID ----
+    # * Predictions wide: id by resample + row id; columns per model
     predictions_by_rowid_tbl <- predictions_tbl %>%
-        dplyr::select(.row_id, .model_id, .pred) %>%
+        dplyr::select(.resample_id, .row_id, .model_id, .pred) %>%
         dplyr::mutate(.model_id = stringr::str_c(".model_id_", .model_id)) %>%
         tidyr::pivot_wider(
-            names_from  = .model_id,
+            id_cols    = c(.resample_id, .row_id),
+            names_from = .model_id,
             values_from = .pred
         )
 
-    # * Join Actuals & Predictions ----
+    # * Join Actuals & Predictions
     data_prepared_tbl <- actuals_by_rowid_tbl %>%
-        dplyr::left_join(predictions_by_rowid_tbl, by = ".row_id")
+        dplyr::left_join(predictions_by_rowid_tbl, by = c(".resample_id", ".row_id"))
 
     # 3. Build Model ----
-
     form <- stats::formula(stringr::str_glue("{target_text} ~ ."))
 
     recipe_spec <- recipes::recipe(
         formula = form,
-        data    = data_prepared_tbl %>% dplyr::select(-.row_id)
+        data    = data_prepared_tbl %>% dplyr::select(-.resample_id, -.row_id)
     )
 
     wflw_spec <- workflows::workflow() %>%
         workflows::add_model(model_spec) %>%
         workflows::add_recipe(recipe_spec)
 
-    # **** Split Paths (Tuned vs Non-Tuned) **** ----
+    # Tuned vs non-tuned paths
+    tune_args_tbl    <- wflw_spec %>% tune::tune_args()
+    tuning_required  <- nrow(tune_args_tbl) > 0
 
-    tune_args_tbl <- wflw_spec %>% tune::tune_args()
-    tuning_required <- nrow(tune_args_tbl) > 0
-
-    # 4A. Tune Model ----
     if (tuning_required) {
-
         if (control$verbose) {
             print(cli::rule("Tuning Model Specification", width = 65))
             cli::cli_alert_info(stringr::str_glue("Performing {kfolds}-Fold Cross Validation."))
@@ -326,37 +328,26 @@ generate_stacking_results <- function(object,
         }
 
         final_model <- wflw_spec %>%
-            tune::finalize_workflow(
-                best_params_tbl
-            ) %>%
+            tune::finalize_workflow(best_params_tbl) %>%
             generics::fit(data_prepared_tbl)
 
-    }
-
-    # 4B. No Tuning -----
-    if (!tuning_required) {
+    } else {
 
         if (control$verbose) {
             print(cli::rule("Fitting Non-Tunable Model Specification", width = 65))
-            cli::cli_alert_info(stringr::str_glue("Fitting model spec to submodel cross-validation predictions."))
+            cli::cli_alert_info("Fitting model spec to submodel cross-validation predictions.")
             cli::cat_line()
         }
 
         best_params_tbl <- NULL
 
         final_model <- wflw_spec %>%
             generics::fit(data_prepared_tbl)
-
     }
 
-
-
     # 5. Fit Best Model ----
-
     pred_tbl <- data_prepared_tbl %>%
-        dplyr::bind_cols(
-            stats::predict(final_model, data_prepared_tbl)
-        )
+        dplyr::bind_cols(stats::predict(final_model, data_prepared_tbl))
 
     cv_comparison_tbl <- pred_tbl %>%
         dplyr::rename(.model_id_ensemble = .pred) %>%
@@ -366,15 +357,15 @@ generate_stacking_results <- function(object,
             values_to = ".preds"
         ) %>%
         dplyr::group_by(.model_id) %>%
-        dplyr::summarise(rmse = yardstick::rmse_vec(!! target_var, .preds), .groups = "drop") %>%
+        dplyr::summarise(rmse = yardstick::rmse_vec(!!target_var, .preds), .groups = "drop") %>%
         dplyr::mutate(.model_id = stringr::str_remove(.model_id, ".model_id_")) %>%
         dplyr::left_join(
             object %>%
                 dplyr::select(.model_id, .model_desc) %>%
                 dplyr::mutate(.model_id = as.character(.model_id)),
             by = ".model_id"
         ) %>%
-        dplyr::mutate(.model_desc = ifelse(is.na(.model_desc), "ENSEMBLE (MODEL SPEC)", .model_desc))
+        dplyr::mutate(.model_desc = dplyr::if_else(is.na(.model_desc), "ENSEMBLE (MODEL SPEC)", .model_desc))
 
     if (control$verbose) {
         cli::cli_alert_info("Prediction Error Comparison:")
@@ -383,27 +374,22 @@ generate_stacking_results <- function(object,
     }
 
     if (control$verbose) print(cli::rule("Final Model", width = 65))
-
     if (control$verbose) {
-
         cli::cat_line()
         cli::cli_alert_info("Model Workflow:")
         print(final_model)
         cli::cat_line()
     }
 
-    # Return ----
-    ret <- list(
+    list(
         fit                  = final_model,
         fit_params           = best_params_tbl,
         prediction_tbl       = pred_tbl,
         prediction_error_tbl = cv_comparison_tbl
     )
-
-    return(ret)
-
 }
 
 
 
 
+