mlverse
diff --git a/‎NEWS.md‎
Lines changed: 1 addition & 0 deletions b/‎NEWS.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎R/dials.R‎
Lines changed: 2 additions & 9 deletions b/‎R/dials.R‎
Lines changed: 2 additions & 9 deletions
diff --git a/‎R/explain.R‎
Lines changed: 1 addition & 3 deletions b/‎R/explain.R‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎R/hardhat.R‎
Lines changed: 15 additions & 19 deletions b/‎R/hardhat.R‎
Lines changed: 15 additions & 19 deletions
diff --git a/‎R/model.R‎
Lines changed: 10 additions & 10 deletions b/‎R/model.R‎
Lines changed: 10 additions & 10 deletions
diff --git a/‎R/parsnip.R‎
Lines changed: 2 additions & 2 deletions b/‎R/parsnip.R‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎R/pretraining.R‎
Lines changed: 28 additions & 25 deletions b/‎R/pretraining.R‎
Lines changed: 28 additions & 25 deletions
diff --git a/‎R/tab-network.R‎
Lines changed: 9 additions & 9 deletions b/‎R/tab-network.R‎
Lines changed: 9 additions & 9 deletions
@@ -2,6 +2,7 @@
 
 ## New features
 
+* messaging improved with {cli} 
 * add optimal threshold and support size into new 1.5 alpha `entmax15()` and `sparsemax15()`
   `mask_types`. Add an optional `mask_topk` config parameter. (#180)
 * tabnet is now using the `torch_ignite_adam` when available. 
 
@@ -1,14 +1,8 @@
 check_dials <- function() {
   if (!requireNamespace("dials", quietly = TRUE))
-    stop("Package \"dials\" needed for this function to work. Please install it.", call. = FALSE)
+    runtime_error("Package {.pkg dials} is needed for this function to work. Please install it.")
 }
 
-check_cli <- function() {
-  if (!requireNamespace("cli", quietly = TRUE))
-    stop("Package \"cli\" needed for this function to work. Please install it.", call. = FALSE)
-}
-
-
 
 #' Parameters for the tabnet model
 #'
@@ -91,7 +85,7 @@ mask_type <- function(values = c("sparsemax", "entmax")) {
   dials::new_qual_param(
     type = "character",
     values = values,
-    label = c(mask_type = "Final layer of feature selector, either sparsemax or entmax"),
+    label = c(mask_type = "Final layer of feature selector, either 'sparsemax' or 'entmax'"),
     finalize = NULL
   )
 }
@@ -145,7 +139,6 @@ num_steps <- function(range = c(3L, 10L), trans = NULL) {
 #' @rdname tabnet_non_tunable
 #' @export
 cat_emb_dim <- function(range = NULL, trans = NULL) {
-  check_cli()
   cli::cli_abort("{.var cat_emb_dim} cannot be used as a {.fun tune} parameter yet.")
 }
 
 
@@ -44,9 +44,7 @@ tabnet_explain <- function(object, new_data) {
 #' @export
 #' @rdname tabnet_explain
 tabnet_explain.default <- function(object, new_data) {
-  stop(domain=NA,
-       gettextf("`tabnet_explain()` is not defined for a '%s'.", class(object)[1]),
-       call. = FALSE)
+  type_error("{.fn tabnet_explain} is not defined for a {.type {class(object)[1]}}.")
 }
 
 #' @export
 
@@ -108,9 +108,7 @@ tabnet_fit <- function(x, ...) {
 #' @export
 #' @rdname tabnet_fit
 tabnet_fit.default <- function(x, ...) {
-  stop(domain=NA,
-       gettextf("`tabnet_fit()` is not defined for a '%s'.", class(x)[1]),
-       call. = FALSE)
+  type_error("{.fn tabnet_fit} is not defined for a {.type {class(x)[1])}}.")
 }
 
 #' @export
@@ -267,9 +265,7 @@ tabnet_pretrain <- function(x, ...) {
 #' @export
 #' @rdname tabnet_pretrain
 tabnet_pretrain.default <- function(x, ...) {
-  stop(domain=NA,
-       gettextf("`tabnet_pretrain()` is not defined for a '%s'.", class(x)[1]),
-       call. = FALSE)
+  type_error("{.fn tabnet_pretrain} is not defined for a {.type {class(x)[1])}}.")
 }
 
 
@@ -348,14 +344,13 @@ tabnet_bridge <- function(processed, config = tabnet_config(), tabnet_model, fro
   epoch_shift <- 0L
 
   if (!(is.null(tabnet_model) || inherits(tabnet_model, "tabnet_fit") || inherits(tabnet_model, "tabnet_pretrain")))
-    stop(gettextf("'%s' is not recognised as a proper TabNet model", tabnet_model),
-         call. = FALSE)
+    type_error("{.var {tabnet_model}} is not recognised as a proper TabNet model")
 
   if (!is.null(from_epoch) && !is.null(tabnet_model)) {
     # model must be loaded from checkpoint
 
     if (from_epoch > (length(tabnet_model$fit$checkpoints) * tabnet_model$fit$config$checkpoint_epoch))
-      stop(gettextf("The model was trained for less than '%s' epochs", from_epoch), call. = FALSE)
+      value_error("The model was trained for less than {.val {from_epoch}} epochs")
 
     # find closest checkpoint for that epoch
     closest_checkpoint <- from_epoch %/% tabnet_model$fit$config$checkpoint_epoch
@@ -367,7 +362,7 @@ tabnet_bridge <- function(processed, config = tabnet_config(), tabnet_model, fro
   }
   if (task == "supervised") {
     if (sum(is.na(outcomes)) > 0) {
-      stop(gettextf("Found missing values in the `%s` outcome column.", names(outcomes)), call. = FALSE)
+      value_error("Found missing values in the {.var {names(outcomes)}} outcome column.")
     }
     if (is.null(tabnet_model)) {
       # new supervised model needs network initialization
@@ -377,7 +372,7 @@ tabnet_bridge <- function(processed, config = tabnet_config(), tabnet_model, fro
     } else if (!check_net_is_empty_ptr(tabnet_model) && inherits(tabnet_model, "tabnet_fit")) {
       # resume training from supervised
       if (!identical(processed$blueprint, tabnet_model$blueprint))
-        stop("Model dimensions don't match.", call. = FALSE)
+        runtime_error("Model dimensions don't match.")
 
       # model is available from tabnet_model$serialized_net
       m <- reload_model(tabnet_model$serialized_net)
@@ -402,15 +397,16 @@ tabnet_bridge <- function(processed, config = tabnet_config(), tabnet_model, fro
       tabnet_model$fit$network <- reload_model(tabnet_model$fit$checkpoints[[last_checkpoint]])
       epoch_shift <- last_checkpoint * tabnet_model$fit$config$checkpoint_epoch
 
-    } else stop(gettextf("No model serialized weight can be found in `%s`, check the model history", tabnet_model), call. = FALSE)
+    } else runtime_error("No model serialized weight can be found in {.var {tabnet_model}}, check the model history")
 
     fit_lst <- tabnet_train_supervised(tabnet_model, predictors, outcomes, config = config, epoch_shift)
     return(new_tabnet_fit(fit_lst, blueprint = processed$blueprint))
 
   } else if (task == "unsupervised") {
 
     if (!is.null(tabnet_model)) {
-      warning("`tabnet_pretrain()` from a model is not currently supported.\nThe pretraining here will start with a network initialization")
+      warn("Using {.fn tabnet_pretrain} from a model is not currently supported.",
+           "Pretraining will start from a new network initialization")
     }
     pretrain_lst <- tabnet_train_unsupervised( predictors, config = config, epoch_shift)
     return(new_tabnet_pretrain(pretrain_lst, blueprint = processed$blueprint))
@@ -447,7 +443,7 @@ predict_tabnet_bridge <- function(type, object, predictors, epoch, batch_size) {
   if (!is.null(epoch)) {
 
     if (epoch > (length(object$fit$checkpoints) * object$fit$config$checkpoint_epoch))
-      stop(gettextf("The model was trained for less than `%s` epochs", epoch), call. = FALSE)
+      value_error("The model was trained for less than {.val {epoch}} epochs")
 
     # find closest checkpoint for that epoch
     ind <- epoch %/% object$fit$config$checkpoint_epoch
@@ -485,7 +481,7 @@ model_pretrain_to_fit <- function(obj, x, y, config = tabnet_config()) {
   m <- reload_model(obj$serialized_net)
 
   if (m$input_dim != tabnet_model_lst$network$input_dim)
-    stop("Model dimensions don't match.", call. = FALSE)
+    runtime_error("Model dimensions don't match.")
 
   # perform update of selected weights into new tabnet_model
   m_stat_dict <- m$state_dict()
@@ -523,25 +519,25 @@ check_type <- function(outcome_ptype, type = NULL) {
   outcome_all_numeric <- all(purrr::map_lgl(outcome_ptype, is.numeric))
 
   if (!outcome_all_numeric && !outcome_all_factor)
-    stop(gettextf("Mixed multi-outcome type '%s' is not supported", unique(purrr::map_chr(outcome_ptype, ~class(.x)[[1]]))), call. = FALSE)
+    not_implemented_error("Mixed multi-outcome type {.type {unique(purrr::map_chr(outcome_ptype, ~class(.x)[[1]]))}} is not supported")
 
   if (is.null(type)) {
     if (outcome_all_factor)
       type <- "class"
     else if (outcome_all_numeric)
       type <- "numeric"
     else if (ncol(outcome_ptype) == 1)
-      stop(gettextf("Unknown outcome type '%s'", class(outcome_ptype)), call. = FALSE)
+      type_error("Unknown outcome type {.type {class(outcome_ptype)}}")
   }
 
   type <- rlang::arg_match(type, c("numeric", "prob", "class"))
 
   if (outcome_all_factor) {
     if (!type %in% c("prob", "class"))
-      stop(gettextf("Outcome is factor and the prediction type is '%s'.", type), call. = FALSE)
+      type_error("Outcome is factor and the prediction type is {.type {type}}.")
   } else if (outcome_all_numeric) {
     if (type != "numeric")
-      stop(gettextf("Outcome is numeric and the prediction type is '%s'.", type), call. = FALSE)
+      type_error("Outcome is numeric and the prediction type is {.type {type}}.")
   }
 
   invisible(type)
 
@@ -253,7 +253,7 @@ resolve_loss <- function(config, dtype) {
     # cross entropy loss is required
     loss_fn <- torch::nn_cross_entropy_loss()
   else
-    stop(gettextf("`%s` is not a valid loss for outcome of type %s", loss, dtype), call. = FALSE)
+    value_error("{.val {loss}} is not a valid loss for outcome of type {.type {dtype}}")
 
   loss_fn
 }
@@ -264,7 +264,7 @@ resolve_early_stop_monitor <- function(early_stopping_monitor, valid_split) {
   else if (early_stopping_monitor %in% c("train_loss", "auto"))
     early_stopping_monitor <- "train_loss"
   else
-    stop(gettextf("%s is not a valid early-stopping metric to monitor with `valid_split` = %s", early_stopping_monitor, valid_split), call. = FALSE)
+    value_error("{.val {early_stopping_monitor}} is not a valid early-stopping metric to monitor with {.val valid_split = {valid_split}}")
 
   early_stopping_monitor
 }
@@ -516,11 +516,11 @@ tabnet_train_supervised <- function(obj, x, y, config = tabnet_config(), epoch_s
     config$ancestor_tt <- torch::torch_tensor(config$ancestor)$to(torch::torch_bool(), device = device)
   }
 
-  # instanciate optimizer
+  # instantiate optimizer
   if (is_optim_generator(config$optimizer)) {
     optimizer <- config$optimizer(network$parameters, config$learn_rate)
   } else {
-    stop("`optimizer` must be resolved into a torch optimizer generator.", call. = FALSE)
+    type_error("{.var optimizer} must be resolved into a torch optimizer generator.")
   }
 
   # define scheduler
@@ -533,7 +533,7 @@ tabnet_train_supervised <- function(obj, x, y, config = tabnet_config(), epoch_s
   } else if (config$lr_scheduler == "step") {
     scheduler <- torch::lr_step(optimizer, config$step_size, config$lr_decay)
   } else {
-    stop("Currently only the 'step' and 'reduce_on_plateau' scheduler are supported.", call. = FALSE)
+    not_implemented_error("Currently only the {.str step} and {.str reduce_on_plateau} scheduler are supported.", call. = FALSE)
   }
 
   # restore previous metrics & checkpoints
@@ -598,7 +598,7 @@ tabnet_train_supervised <- function(obj, x, y, config = tabnet_config(), epoch_s
         patience_counter <- patience_counter + 1
         if (patience_counter >= config$early_stopping_patience){
           if (config$verbose)
-            message(gettextf("Early stopping at epoch %03d", epoch))
+            cli::cli_alert_success(gettextf("Early-stopping at epoch {.val epoch}"))
           break
         }
       } else {
@@ -623,10 +623,9 @@ tabnet_train_supervised <- function(obj, x, y, config = tabnet_config(), epoch_s
   if(!config$skip_importance) {
     importance_sample_size <- config$importance_sample_size
     if (is.null(config$importance_sample_size) && train_ds$.length() > 1e5) {
-      warning(
-        gettextf(
-          "Computing importances for a dataset with size %s. This can consume too much memory. We are going to use a sample of size 1e5, You can disable this message by using the `importance_sample_size` argument.",
-          train_ds$.length()))
+      warn("Computing importances for a dataset with size {.val {train_ds$.length()}}. 
+           This can consume too much memory. We are going to use a sample of size 1e5. 
+           You can disable this message by using the `importance_sample_size` argument.")
       importance_sample_size <- 1e5
     }
     indexes <- as.numeric(torch::torch_randint(
@@ -643,6 +642,7 @@ tabnet_train_supervised <- function(obj, x, y, config = tabnet_config(), epoch_s
   } else {
     importances <- NULL
   }
+  
   list(
     network = network,
     metrics = metrics,
 
@@ -471,8 +471,8 @@ tabnet <- function(mode = "unknown",  cat_emb_dim = NULL, decision_width = NULL,
                    ) {
 
   if (!requireNamespace("parsnip", quietly = TRUE))
-    stop("Package \"parsnip\" needed for this function to work. Please install it.", call. = FALSE)
-
+    runtime_error("Package {.pkg parsnip} is needed for this function to work. Please install it.")
+  
   if (parsnip_is_missing_tabnet(tabnet_env)) {
     add_parsnip_tabnet()
     tabnet_env$parsnip_added <- TRUE
 
@@ -114,12 +114,13 @@ tabnet_train_unsupervised <- function(x, config = tabnet_config(), epoch_shift =
 
   network$to(device = device)
 
-  # instanciate optimizer
+  # instantiate optimizer
   if (is_optim_generator(config$optimizer)) {
     optimizer <- config$optimizer(network$parameters, config$learn_rate)
-  } else
-    stop("`optimizer` must be resolved into a torch optimizer generator.", call. = FALSE)
-
+  } else {
+    type_error("{.var optimizer} must be resolved into a torch optimizer generator.")
+  }
+  
 
   # define scheduler
   if (is.null(config$lr_scheduler)) {
@@ -131,7 +132,7 @@ tabnet_train_unsupervised <- function(x, config = tabnet_config(), epoch_shift =
   } else if (config$lr_scheduler == "step") {
     scheduler <- torch::lr_step(optimizer, config$step_size, config$lr_decay)
   } else {
-    stop("Currently only the 'step' and 'reduce_on_plateau' scheduler are supported.", call. = FALSE)
+    not_implemented_error("Currently only the {.str step} and {.str reduce_on_plateau} scheduler are supported.", call. = FALSE)
   }
 
   # initialize metrics & checkpoints
@@ -195,7 +196,7 @@ tabnet_train_unsupervised <- function(x, config = tabnet_config(), epoch_shift =
         patience_counter <- patience_counter + 1
         if (patience_counter >= config$early_stopping_patience) {
           if (config$verbose)
-            rlang::inform(sprintf("Early stopping at epoch %03d", epoch))
+            cli::cli_alert_success(gettextf("Early-stopping at epoch {.val epoch}"))
           break
         }
       } else {
@@ -217,26 +218,28 @@ tabnet_train_unsupervised <- function(x, config = tabnet_config(), epoch_shift =
   }
 
   network$to(device = "cpu")
-
-  importance_sample_size <- config$importance_sample_size
-  if (is.null(config$importance_sample_size) && train_ds$.length() > 1e5) {
-    warning(domain=NA,
-            gettextf("Computing importances for a dataset with size %s. This can consume too much memory. We are going to use a sample of size 1e5. You can disable this message by using the `importance_sample_size` argument.", train_ds$.length()),
-            call. = FALSE)
-    importance_sample_size <- 1e5
-  }
-  indexes <- as.numeric(torch::torch_randint(
-    1, train_ds$.length(), min(importance_sample_size, train_ds$.length()),
-    dtype = torch::torch_long()
-  ))
-  importances <- tibble::tibble(
-    variables = colnames(x),
-    importance = compute_feature_importance(
-      network,
-      train_ds$.getbatch(batch =indexes)$x$to(device = "cpu"),
-      train_ds$.getbatch(batch =indexes)$x_na_mask$to(device = "cpu")
+  if(!config$skip_importance) {
+    importance_sample_size <- config$importance_sample_size
+    if (is.null(config$importance_sample_size) && train_ds$.length() > 1e5) {
+      warn("Computing importances for a dataset with size {.val {train_ds$.length()}}. 
+           This can consume too much memory. We are going to use a sample of size 1e5. 
+           You can disable this message by using the `importance_sample_size` argument.")
+      importance_sample_size <- 1e5
+    }
+    indexes <- as.numeric(torch::torch_randint(
+      1, train_ds$.length(), min(importance_sample_size, train_ds$.length()),
+      dtype = torch::torch_long()
+    ))
+    importances <- tibble::tibble(
+      variables = colnames(x),
+      importance = compute_feature_importance(
+        network,
+        train_ds$.getbatch(batch =indexes)$x$to(device = "cpu"),
+        train_ds$.getbatch(batch =indexes)$x_na_mask$to(device = "cpu"))
     )
-  )
+  } else {
+    importances <- NULL
+  }
 
   list(
     network = network,
 
@@ -235,9 +235,9 @@ tabnet_pretrainer <- torch::nn_module(
     self$initial_bn <- torch::nn_batch_norm1d(input_dim, momentum = momentum)
 
     if (n_steps <= 0)
-      stop("'n_steps' should be a positive integer.")
+      value_error("{.var n_steps} should be a positive integer.")
     if (n_independent == 0 && n_shared == 0)
-      stop("'n_shared' and 'n_independant' can't be both zero.")
+      value_error("{.var n_shared} and {.var n_independant} can't be both zero.")
 
     # self$virtual_batch_size <- virtual_batch_size
     self$embedder <- embedding_generator(input_dim, cat_dims, cat_idxs, cat_emb_dim)
@@ -402,10 +402,10 @@ tabnet_nn <- torch::nn_module(
     self$cat_emb_dim <- cat_emb_dim
 
     if (n_steps <= 0)
-      stop("'n_steps' should be a positive integer.")
+      value_error("{.var n_steps} should be a positive integer.")
     if (n_independent == 0 && n_shared == 0)
-      stop("'n_shared' and 'n_independant' can't be both zero.")
-
+      value_error("{.var n_shared} and {.var n_independant} can't be both zero.")
+    
     self$virtual_batch_size <- virtual_batch_size
     self$embedder <- embedding_generator(input_dim, cat_dims, cat_idxs, cat_emb_dim)
     self$embedder_na <- na_embedding_generator(input_dim, cat_dims, cat_idxs, cat_emb_dim)
@@ -460,8 +460,7 @@ attentive_transformer <- torch::nn_module(
     else if (mask_type == "sparsemax")
       self$selector <- sparsemax(dim = -1L)
     else
-      stop("Please choose either 'sparsemax', 'sparsemax15', 'entmax' or 'entmax15' as 'mask_type'")
-
+      value_error("Please choose either {.val sparsemax}, {.val sparsemax15}, {.val entmax} or {.val entmax15} as {.var mask_type}")
   },
   forward = function(priors, processed_feat) {
     x <- self$fc(processed_feat)
@@ -625,8 +624,9 @@ embedding_generator <- torch::nn_module(
 
     # check that all embeddings dimensions are provided
     if (length(self$cat_emb_dims) != length(cat_dims)){
-      msg = paste0("`cat_emb_dim` length must be 1 or the number of categorical predictors, got length ",length(self$cat_emb_dims)," for ",length(cat_dims)," categorical predictors")
-      stop(msg)
+      value_error("{.var cat_emb_dim} length must be 1 or the number of categorical predictors, 
+                  got length {.val {length(self$cat_emb_dims)}} for {.val {length(cat_dims)}} 
+                  categorical predictors")
     }
 
     self$post_embed_dim <- as.integer(input_dim + sum(self$cat_emb_dims) - length(self$cat_emb_dims))