mlr-org
diff --git a/‎DESCRIPTION‎
Lines changed: 1 addition & 0 deletions b/‎DESCRIPTION‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎NEWS.md‎
Lines changed: 2 additions & 0 deletions b/‎NEWS.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎R/LearnerClassifCVGlmnet.R‎
Lines changed: 1 addition & 3 deletions b/‎R/LearnerClassifCVGlmnet.R‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎R/LearnerClassifGlmnet.R‎
Lines changed: 1 addition & 3 deletions b/‎R/LearnerClassifGlmnet.R‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎R/LearnerClassifXgboost.R‎
Lines changed: 30 additions & 104 deletions b/‎R/LearnerClassifXgboost.R‎
Lines changed: 30 additions & 104 deletions
diff --git a/‎R/LearnerRegrCVGlmnet.R‎
Lines changed: 2 additions & 2 deletions b/‎R/LearnerRegrCVGlmnet.R‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎R/LearnerRegrGlmnet.R‎
Lines changed: 2 additions & 2 deletions b/‎R/LearnerRegrGlmnet.R‎
Lines changed: 2 additions & 2 deletions
@@ -84,4 +84,5 @@ Collate:
     'helpers.R'
     'helpers_glmnet.R'
     'helpers_ranger.R'
+    'helpers_xgboost.R'
     'zzz.R'
@@ -1,5 +1,7 @@
 # mlr3learners (development version)
 
+* fix: using offset during prediction for `xgboost` learners
+
 # mlr3learners 0.14.0
 
 * compatibility: xgboost 3.1.2.1
 
@@ -58,7 +58,7 @@ LearnerClassifCVGlmnet = R6Class("LearnerClassifCVGlmnet",
         mxit                 = p_int(1L, default = 100L, tags = "train"),
         nfolds               = p_int(3L, default = 10L, tags = "train"),
         nlambda              = p_int(1L, default = 100L, tags = "train"),
-        use_pred_offset      = p_lgl(default = TRUE, tags = "predict"),
+        use_pred_offset      = p_lgl(init = TRUE, tags = "predict"),
         parallel             = p_lgl(default = FALSE, tags = "train"),
         penalty.factor       = p_uty(tags = "train"),
         pmax                 = p_int(0L, tags = "train"),
@@ -78,8 +78,6 @@ LearnerClassifCVGlmnet = R6Class("LearnerClassifCVGlmnet",
         upper.limits         = p_uty(tags = "train")
       )
 
-      ps$set_values(use_pred_offset = TRUE)
-
       super$initialize(
         id = "classif.cv_glmnet",
         param_set = ps,
 
@@ -65,7 +65,7 @@ LearnerClassifGlmnet = R6Class("LearnerClassifGlmnet",
         mxit                 = p_int(1L, default = 100L, tags = "train"),
         mxitnr               = p_int(1L, default = 25L, tags = "train"),
         nlambda              = p_int(1L, default = 100L, tags = "train"),
-        use_pred_offset      = p_lgl(default = TRUE, tags = "predict"),
+        use_pred_offset      = p_lgl(init = TRUE, tags = "predict"),
         penalty.factor       = p_uty(tags = "train"),
         pmax                 = p_int(0L, tags = "train"),
         pmin                 = p_dbl(0, 1, default = 1.0e-9, tags = "train"),
@@ -82,8 +82,6 @@ LearnerClassifGlmnet = R6Class("LearnerClassifGlmnet",
         upper.limits         = p_uty(tags = "train")
       )
 
-      ps$set_values(use_pred_offset = TRUE)
-
       super$initialize(
         id = "classif.glmnet",
         param_set = ps,
 
@@ -98,55 +98,55 @@ LearnerClassifXgboost = R6Class("LearnerClassifXgboost",
       ps = ps(
         alpha                       = p_dbl(0, default = 0, tags = "train"),
         approxcontrib               = p_lgl(default = FALSE, tags = "predict"),
-        base_score                  = p_dbl(default = 0.5, tags = "train"),
+        base_score                  = p_dbl(tags = "train"),
         booster                     = p_fct(c("gbtree", "gblinear", "dart"), default = "gbtree", tags = "train"),
         callbacks                   = p_uty(default = list(), tags = "train"),
-        colsample_bylevel           = p_dbl(0, 1, default = 1, tags = "train"),
-        colsample_bynode            = p_dbl(0, 1, default = 1, tags = "train"),
-        colsample_bytree            = p_dbl(0, 1, default = 1, tags = "train"),
+        colsample_bylevel           = p_dbl(0, 1, default = 1, tags = "train", depends = quote(booster == "gbtree")),
+        colsample_bynode            = p_dbl(0, 1, default = 1, tags = "train", depends = quote(booster == "gbtree")),
+        colsample_bytree            = p_dbl(0, 1, default = 1, tags = "train", depends = quote(booster == "gbtree")),
         device                      = p_uty(default = "cpu", tags = "train"),
         disable_default_eval_metric = p_lgl(default = FALSE, tags = "train"),
         early_stopping_rounds       = p_int(1L, default = NULL, special_vals = list(NULL), tags = "train"),
         eta                         = p_dbl(0, 1, default = 0.3, tags = "train"),
         evals                       = p_uty(default = NULL, tags = "train"),
         eval_metric                 = p_uty(tags = "train"),
         custom_metric               = p_uty(tags = "train", custom_check = crate({function(x) check_true(any(is.function(x), test_multi_class(x, c("MeasureClassifSimple", "MeasureBinarySimple"))))})),
-        extmem_single_page          = p_lgl(default = FALSE, tags = "train"),
+        extmem_single_page          = p_lgl(default = FALSE, tags = "train", depends = quote(tree_method %in% c("hist", "approx"))),
         feature_selector            = p_fct(c("cyclic", "shuffle", "random", "greedy", "thrifty"), default = "cyclic", tags = "train", depends = quote(booster == "gblinear")),
         gamma                       = p_dbl(0, default = 0, tags = "train"),
-        grow_policy                 = p_fct(c("depthwise", "lossguide"), default = "depthwise", tags = "train", depends = quote(tree_method == "hist")),
-        interaction_constraints     = p_uty(tags = "train"),
+        grow_policy                 = p_fct(c("depthwise", "lossguide"), default = "depthwise", tags = "train", depends = quote(booster == "gbtree" && tree_method %in% c("hist", "approx"))),
+        interaction_constraints     = p_uty(tags = "train", depends = quote(booster == "gbtree")),
         iterationrange              = p_uty(tags = "predict"),
         lambda                      = p_dbl(0, default = 1, tags = "train"),
-        max_bin                     = p_int(2L, default = 256L, tags = "train", depends = quote(tree_method == "hist")),
-        max_cached_hist_node        = p_int(default = 65536L, tags = "train", depends = quote(tree_method == "hist")),
-        max_cat_to_onehot           = p_int(tags = "train"),
-        max_cat_threshold           = p_dbl(tags = "train"),
-        max_delta_step              = p_dbl(0, default = 0, tags = "train"),
-        max_depth                   = p_int(0L, default = 6L, tags = "train"),
-        max_leaves                  = p_int(0L, default = 0L, tags = "train", depends = quote(grow_policy == "lossguide")),
+        max_bin                     = p_int(2L, default = 256L, tags = "train", depends = quote(tree_method %in% c("hist", "approx"))),
+        max_cached_hist_node        = p_int(default = 65536L, tags = "train", depends = quote(tree_method %in% c("hist", "approx"))),
+        max_cat_to_onehot           = p_int(tags = "train", depends = quote(tree_method %in% c("hist", "approx"))),
+        max_cat_threshold           = p_dbl(tags = "train", depends = quote(tree_method %in% c("hist", "approx"))),
+        max_delta_step              = p_dbl(0, default = 0, tags = "train", depends = quote(booster == "gbtree")),
+        max_depth                   = p_int(0L, default = 6L, tags = "train", depends = quote(booster == "gbtree")),
+        max_leaves                  = p_int(0L, default = 0L, tags = "train", depends = quote(booster == "gbtree")),
         maximize                    = p_lgl(default = NULL, special_vals = list(NULL), tags = "train"),
-        min_child_weight            = p_dbl(0, default = 1, tags = "train"),
+        min_child_weight            = p_dbl(0, default = 1, tags = "train", depends = quote(booster == "gbtree")),
         missing                     = p_dbl(default = NA, tags = "predict", special_vals = list(NA, NA_real_, NULL)),
-        monotone_constraints        = p_uty(default = 0, tags = "train", custom_check = crate(function(x) { checkmate::check_integerish(x, lower = -1, upper = 1, any.missing = FALSE) })), # nolint
+        monotone_constraints        = p_uty(default = 0, tags = "train", custom_check = crate(function(x) { checkmate::check_integerish(x, lower = -1, upper = 1, any.missing = FALSE) }), depends = quote(booster == "gbtree")), # nolint
         nrounds                     = p_nrounds,
         normalize_type              = p_fct(c("tree", "forest"), default = "tree", tags = "train", depends = quote(booster == "dart")),
         nthread                     = p_int(1L, init = 1L, tags = c("train", "threads")),
-        num_parallel_tree           = p_int(1L, default = 1L, tags = "train"),
+        num_parallel_tree           = p_int(1L, default = 1L, tags = "train", depends = quote(booster == "gbtree")),
         objective                   = p_uty(default = "binary:logistic", tags = c("train", "predict")),
         one_drop                    = p_lgl(default = FALSE, tags = "train", depends = quote(booster == "dart")),
         print_every_n               = p_int(1L, default = 1L, tags = "train", depends = quote(verbose == 1L)),
         rate_drop                   = p_dbl(0, 1, default = 0, tags = "train", depends = quote(booster == "dart")),
-        refresh_leaf                = p_lgl(default = TRUE, tags = "train"),
+        refresh_leaf                = p_lgl(default = TRUE, tags = "train", depends = quote(booster == "gbtree")),
         seed                        = p_int(tags = "train"),
         seed_per_iteration          = p_lgl(default = FALSE, tags = "train"),
         sampling_method             = p_fct(c("uniform", "gradient_based"), default = "uniform", tags = "train", depends = quote(booster == "gbtree")),
         sample_type                 = p_fct(c("uniform", "weighted"), default = "uniform", tags = "train", depends = quote(booster == "dart")),
         save_name                   = p_uty(default = NULL, tags = "train"),
         save_period                 = p_int(0, default = NULL, special_vals = list(NULL), tags = "train"),
-        scale_pos_weight            = p_dbl(default = 1, tags = "train"),
+        scale_pos_weight            = p_dbl(default = 1, tags = "train", depends = quote(booster == "gbtree")),
         skip_drop                   = p_dbl(0, 1, default = 0, tags = "train", depends = quote(booster == "dart")),
-        subsample                   = p_dbl(0, 1, default = 1, tags = "train"),
+        subsample                   = p_dbl(0, 1, default = 1, tags = "train", depends = quote(booster == "gbtree")),
         top_k                       = p_int(0, default = 0, tags = "train", depends = quote(feature_selector %in% c("greedy", "thrifty") && booster == "gblinear")),
         training                    = p_lgl(default = FALSE, tags = "predict"),
         tree_method                 = p_fct(c("auto", "exact", "approx", "hist", "gpu_hist"), default = "auto", tags = "train", depends = quote(booster %in% c("gbtree", "dart"))),
@@ -156,7 +156,8 @@ LearnerClassifXgboost = R6Class("LearnerClassifXgboost",
         validate_features           = p_lgl(default = TRUE, tags = "predict"),
         verbose                     = p_int(0L, 2L, init = 0L, tags = "train"),
         verbosity                   = p_int(0L, 2L, init = 0L, tags = "train"),
-        xgb_model                   = p_uty(default = NULL, tags = "train")
+        xgb_model                   = p_uty(default = NULL, tags = "train"),
+        use_pred_offset             = p_lgl(init = TRUE, tags = "predict")
       )
 
       super$initialize(
@@ -190,7 +191,7 @@ LearnerClassifXgboost = R6Class("LearnerClassifXgboost",
     #' @field internal_valid_scores (named `list()` or `NULL`)
     #' The validation scores extracted from `model$evaluation_log`.
     #' If early stopping is activated, this contains the validation scores of the model for the optimal `nrounds`,
-    #' otherwise the `nrounds` for the final model.
+    #' otherwise the scores are taken from the final boosting round `nrounds`.
     internal_valid_scores = function() {
       self$state$internal_valid_scores
     },
@@ -261,22 +262,8 @@ LearnerClassifXgboost = R6Class("LearnerClassifXgboost",
         xgboost::setinfo(xgb_data, "weight", weights)
       }
 
-      if ("offset" %in% task$properties) {
-        offset = task$offset
-        if (nlvls == 2L) {
-          # binary case
-          base_margin = offset$offset
-        } else {
-          # multiclass needs a matrix (n_samples, n_classes)
-          # it seems reasonable to reorder according to label (0,1,2,...)
-          reordered_cols = paste0("offset_", rev(levels(task$truth())))
-          n_offsets = ncol(offset) - 1 # all expect `row_id`
-          if (length(reordered_cols) != n_offsets) {
-            stopf("Task has %i class labels, and only %i offset columns are provided",
-                 nlevels(task$truth()), n_offsets)
-          }
-          base_margin = as_numeric_matrix(offset)[, reordered_cols]
-        }
+      base_margin = xgboost_get_base_margin(task, "train", pv)
+      if (!is.null(base_margin)) {
         xgboost::setinfo(xgb_data, "base_margin", base_margin)
       }
 
@@ -292,22 +279,13 @@ LearnerClassifXgboost = R6Class("LearnerClassifXgboost",
         xgb_valid_data = xgboost::xgb.DMatrix(data = as_numeric_matrix(valid_data), label = valid_label)
 
         weights = get_weights(internal_valid_task, private)
-
         if (!is.null(weights)) {
           xgboost::setinfo(xgb_valid_data, "weight", weights)
         }
 
-        if ("offset" %in% internal_valid_task$properties) {
-          valid_offset = internal_valid_task$offset
-          if (nlvls == 2L) {
-            base_margin = valid_offset$offset
-          } else {
-            # multiclass needs a matrix (n_samples, n_classes)
-            # it seems reasonable to reorder according to label (0,1,2,...)
-            reordered_cols = paste0("offset_", rev(levels(internal_valid_task$truth())))
-            base_margin = as_numeric_matrix(valid_offset)[, reordered_cols]
-          }
-          xgboost::setinfo(xgb_valid_data, "base_margin", base_margin)
+        valid_base_margin = xgboost_get_base_margin(internal_valid_task, "train", pv)
+        if (!is.null(base_margin)) {
+          xgboost::setinfo(xgb_valid_data, "base_margin", valid_base_margin)
         }
 
         pv$evals = c(pv$evals, list(test = xgb_valid_data))
@@ -371,6 +349,8 @@ LearnerClassifXgboost = R6Class("LearnerClassifXgboost",
         pv$objective = ifelse(nlvls == 2L, "binary:logistic", "multi:softprob")
       }
 
+      pv$base_margin = xgboost_get_base_margin(task, "predict", pv)
+
       newdata = as_numeric_matrix(ordered_features(task, self))
       pred = invoke(predict, model, newdata = newdata, .args = pv)
       if (nlvls == 2L) { # binaryclass
@@ -465,7 +445,6 @@ LearnerClassifXgboost = R6Class("LearnerClassifXgboost",
   )
 )
 
-
 #' @export
 default_values.LearnerClassifXgboost = function(x, search_space, task, ...) { # nolint
   special_defaults = list(
@@ -477,56 +456,3 @@ default_values.LearnerClassifXgboost = function(x, search_space, task, ...) { #
 
 #' @include aaa.R
 learners[["classif.xgboost"]] = LearnerClassifXgboost
-
-# mlr3 measure to custom inner measure functions
-xgboost_binary_binary_prob = function(pred, dtrain, measure, ...) {
-  # label is a vector of labels (0, 1)
-  truth = factor(xgboost::getinfo(dtrain, "label"), levels = c(0, 1))
-  # pred is a vector of log odds
-  # transform log odds to probabilities
-  pred = 1 / (1 + exp(-pred))
-  measure$fun(truth, pred, positive = "1")
-}
-
-xgboost_binary_classif_prob = function(pred, dtrain, measure, ...) {
-  # label is a vector of labels (0, 1)
-  truth = factor(xgboost::getinfo(dtrain, "label"), levels = c(0, 1))
-  # pred is a vector of log odds
-  # transform log odds to probabilities
-  pred = 1 / (1 + exp(-pred))
-  # multiclass measure needs a matrix of probabilities
-  pred_mat = matrix(c(pred, 1 - pred), ncol = 2)
-  colnames(pred_mat) = c("1", "0")
-  measure$fun(truth, pred_mat, positive = "1")
-}
-
-xgboost_binary_response = function(pred, dtrain, measure, ...) {
-  # label is a vector of labels (0, 1)
-  truth = factor(xgboost::getinfo(dtrain, "label"), levels = c(0, 1))
-  # pred is a vector of log odds
-  response = factor(as.integer(pred > 0), levels = c(0, 1))
-  measure$fun(truth, response)
-}
-
-xgboost_multiclass_prob = function(pred, dtrain, measure, n_classes, ...) {
-  # label is a vector of labels (0, 1, ..., n_classes - 1)
-  truth = factor(xgboost::getinfo(dtrain, "label"), levels = seq_len(n_classes) - 1L)
-
-  # pred is a matrix of log odds for each class
-  # transform log odds to probabilities
-  pred_exp = exp(pred)
-  pred_mat = pred_exp / rowSums(pred_exp)
-  colnames(pred_mat) = levels(truth)
-
-  measure$fun(truth, pred_mat)
-}
-
-xgboost_multiclass_response = function(pred, dtrain, measure, n_classes, ...) {
-  # label is a vector of labels (0, 1, ..., n_classes - 1)
-  truth = factor(xgboost::getinfo(dtrain, "label"), levels = seq_len(n_classes) - 1L)
-
-  # pred is a matrix of log odds for each class
-  response = factor(max.col(pred, ties.method = "random") - 1, levels = levels(truth))
-  measure$fun(truth, response)
-}
-
@@ -53,7 +53,7 @@ LearnerRegrCVGlmnet = R6Class("LearnerRegrCVGlmnet",
         mxitnr               = p_int(1L, default = 25L, tags = "train"),
         nfolds               = p_int(3L, default = 10L, tags = "train"),
         nlambda              = p_int(1L, default = 100L, tags = "train"),
-        use_pred_offset      = p_lgl(default = TRUE, tags = "predict"),
+        use_pred_offset      = p_lgl(init = TRUE, tags = "predict"),
         parallel             = p_lgl(default = FALSE, tags = "train"),
         penalty.factor       = p_uty(tags = "train"),
         pmax                 = p_int(0L, tags = "train"),
@@ -73,7 +73,7 @@ LearnerRegrCVGlmnet = R6Class("LearnerRegrCVGlmnet",
         upper.limits         = p_uty(tags = "train")
       )
 
-      ps$set_values(family = "gaussian", use_pred_offset = TRUE)
+      ps$set_values(family = "gaussian")
 
       super$initialize(
         id = "regr.cv_glmnet",
 
@@ -52,7 +52,7 @@ LearnerRegrGlmnet = R6Class("LearnerRegrGlmnet",
         mnlam                 = p_int(1L, default = 5L, tags = "train"),
         mxit                  = p_int(1L, default = 100L, tags = "train"),
         mxitnr                = p_int(1L, default = 25L, tags = "train"),
-        use_pred_offset       = p_lgl(default = TRUE, tags = "predict"),
+        use_pred_offset       = p_lgl(init = TRUE, tags = "predict"),
         nlambda               = p_int(1L, default = 100L, tags = "train"),
         parallel              = p_lgl(default = FALSE, tags = "train"),
         penalty.factor        = p_uty(tags = "train"),
@@ -71,7 +71,7 @@ LearnerRegrGlmnet = R6Class("LearnerRegrGlmnet",
         upper.limits          = p_uty(tags = "train")
       )
 
-      ps$set_values(family = "gaussian", use_pred_offset = TRUE)
+      ps$set_values(family = "gaussian")
 
       super$initialize(
         id = "regr.glmnet",