mlr-org · AnnaNzrv · May 21, 2025 · May 21, 2025 · May 21, 2025 · May 21, 2025
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -22,7 +22,8 @@ Authors@R: c(
            comment = c(ORCID = "0000-0002-3609-8674")),
     person("Lukas", "Burk", , "[email protected]", role = "ctb",
            comment = c(ORCID = "0000-0001-7528-3795")),
-    person("Lona", "Koers", , "[email protected]", role = "ctb")
+    person("Lona", "Koers", , "[email protected]", role = "ctb"),
+    person("Anna", "Nazarova", , "[email protected]", role = "ctb")
   )
 Description: Extra learners for use in mlr3.
 License: LGPL-3
@@ -100,6 +101,7 @@ Suggests:
     sandwich,
     set6,
     sm,
+    sparsediscrim,
     stats,
     survival,
     survivalmodels (>= 0.1.19),

diff --git a/NAMESPACE b/NAMESPACE
@@ -10,6 +10,7 @@ export(LearnerClassifCTree)
 export(LearnerClassifCatboost)
 export(LearnerClassifDecisionStump)
 export(LearnerClassifDecisionTable)
+export(LearnerClassifDiagLda)
 export(LearnerClassifEarth)
 export(LearnerClassifFNN)
 export(LearnerClassifGAMBoost)
@@ -29,6 +30,7 @@ export(LearnerClassifLSSVM)
 export(LearnerClassifLiblineaR)
 export(LearnerClassifLightGBM)
 export(LearnerClassifLogistic)
+export(LearnerClassifMdeb)
 export(LearnerClassifMob)
 export(LearnerClassifMultilayerPerceptron)
 export(LearnerClassifNaiveBayesMultinomial)
@@ -45,6 +47,7 @@ export(LearnerClassifRandomPlantedForest)
 export(LearnerClassifRandomTree)
 export(LearnerClassifSGD)
 export(LearnerClassifSMO)
+export(LearnerClassifSdlda)
 export(LearnerClassifSimpleLogistic)
 export(LearnerClassifVotedPerceptron)
 export(LearnerDensKDEks)

diff --git a/R/bibentries.R b/R/bibentries.R
@@ -712,6 +712,40 @@ bibentries = c( # nolint start
     title = "Large margin classification using the perceptron algorithm",
     year = "1998"
   ),
+  Dudoit2002 = bibentry("article",
+    title = "Comparison of Discrimination Methods for the Classification of Tumors Using Gene Expression Data",
+    author = "Sandrine Dudoit and Jane Fridlyand and Terence P Speed",
+    year = "2002",
+    journal = "Journal of the American Statistical Association",
+    volume = "97",
+    number = "457",
+    pages = "77--87",
+    publisher = "ASA Websites",
+    doi = "10.1198/016214502753479248"
+  ),
+  Srivastava2007mdeb = bibentry("article",
+    title = "Comparison of Discrimination Methods for High Dimensional Data",
+    author = "Srivastava, M. and Kubokawa, T.",
+    year = "2007",
+    journal = "Journal of the Japanese Statistical Association",
+    volume = "37",
+    number = "1",
+    pages = "123--134"
+  ),
+  Peng2009sdlda = bibentry("article",
+    ISSN = "0006341X, 15410420",
+    URL = "http://www.jstor.org/stable/20640622",
+    abstract = "High-dimensional data such as microarrays have brought us new statistical challenges. For example, using a large number of genes to classify samples based on a small number of microarrays remains a difficult problem. Diagonal discriminant analysis, support vector machines, and k-nearest neighbor have been suggested as among the best methods for small sample size situations, but none was found to be superior to others. In this article, we propose an improved diagonal discriminant approach through shrinkage and regularization of the variances. The performance of our new approach along with the existing methods is studied through simulations and applications to real data. These studies show that the proposed shrinkage-based and regularization diagonal discriminant methods have lower misclassification rates than existing methods in many cases.",
+    author = "Herbert Pang and Tiejun Tong and Hongyu Zhao",
+    journal = "Biometrics",
+    number = "4",
+    pages = "1021--1029",
+    publisher = "[Wiley, International Biometric Society]",
+    title = "Shrinkage-Based Diagonal Discriminant Analysis and Its Applications in High-Dimensional Data",
+    urldate = "2025-05-21",
+    volume = "65",
+    year = "2009"
+  ),
   faisolo2017qgam = bibentry("article",
     title = "Fast Calibrated Additive Quantile Regression",
     author = "Fasiolo, Matteo and Wood, Simon N. and Zaffran, Margaux and Nedellec, Raphael and Goude, Yannig",

diff --git a/R/learner_sparsediscrim_classif_dlda.R b/R/learner_sparsediscrim_classif_dlda.R
@@ -0,0 +1,77 @@
+#' @title Classification Diagonal Linear Discriminant Analysis Learner
+#' @author annanzrv
+#' @name mlr_learners_classif.dlda
+#'
+#' @description
+#' Diagonal Linear Discriminant Analysis classifier.
+#' Belongs to the family of Naive Bayes classifiers, where the distributions of
+#' each class are assumed to be multivariate normal and to share a common
+#' covariance matrix. Off-diagonal elements of the pooled sample covariance matrix
+#' are set to zero
+#' Calls [sparsediscrim::lda_diag()] from \CRANpkg{sparsediscrim}.
+#'
+#'
+#' @templateVar id classif.dlda
+#' @template learner
+#'
+#' @references
+#' `r format_bib("Dudoit2002)`
+#'
+#' @template seealso_learner
+#' @template example
+#' @export
+LearnerClassifDiagLda = R6Class("LearnerClassifDiagLda",
+  inherit = LearnerClassif,
+  public = list(
+    #' @description
+    #' Creates a new instance of this [R6][R6::R6Class] class.
+    initialize = function() {
+      param_set = ps(
+        prior = p_uty(default = NULL, tags = "train")
+      )
+
+      super$initialize(
+        id = "classif.dlda",
+        packages = "sparsediscrim",
+        feature_types = c("integer", "numeric"),
+        predict_types = c("response", "prob"),
+        param_set = param_set,
+        properties = c("multiclass", "twoclass"),
+        man = "mlr3extralearners::mlr_learners_classif.dlda",
+        label = "Diagonal Linear Discriminant Analysis"
+      )
+    }
+  ),
+  private = list(
+    .train = function(task) {
+      # get parameters for training
+      pars = self$param_set$get_values(tags = "train")
+
+      formula = task$formula()
+      data = task$data()
+
+      invoke(
+        sparsediscrim::lda_diag,
+        formula = formula,
+        data = data,
+        .args = pars
+      )
+    },
+    .predict = function(task) {
+      pars = self$param_set$get_values(tags = "predict")
+
+      # get newdata and ensure same ordering in train and predict
+      newdata = ordered_features(task, self)
+
+      # Calculate predictions for the selected predict type.
+      type = self$predict_type
+      if (type == "response") type = "class"
+
+      pred = invoke(predict, self$model, newdata = newdata, type = type, .args = pars)
+      if (type == "prob") pred = as.matrix(pred)
+      setNames(list(pred), self$predict_type)
+    }
+  )
+)
+
+.extralrns_dict$add("classif.dlda", LearnerClassifDiagLda)
diff --git a/R/learner_sparsediscrim_classif_mdeb.R b/R/learner_sparsediscrim_classif_mdeb.R
@@ -0,0 +1,74 @@
+#' @title Classification Linear Discriminant Analysis Learner
+#' @author annanzrv
+#' @name mlr_learners_classif.mdeb
+#'
+#' @description
+#' Minimum Distance Empirical Bayesian classification, designed for small-sample, high-dimensional data.
+#' Empirical Bayes estimator where the eigenvalues of the pooled sample covariance matrix are shrunken towards
+#' the identity matrix.
+#' Calls [sparsediscrim::lda_emp_bayes()] from FIXME: \CRANpkg{sparsediscrim}.
+#'
+#' @templateVar id classif.mdeb
+#' @template learner
+#'
+#' @references
+#' `r format_bib("Srivastava2007mdeb")`
+#'
+#' @template seealso_learner
+#' @template example
+#' @export
+LearnerClassifMdeb = R6Class("LearnerClassifMdeb",
+  inherit = LearnerClassif,
+  public = list(
+    #' @description
+    #' Creates a new instance of this [R6][R6::R6Class] class.
+    initialize = function() {
+      param_set = ps(
+        prior = p_uty(default = NULL, tags = "train")
+      )
+
+      super$initialize(
+        id = "classif.dlda",
+        packages = "sparsediscrim",
+        feature_types = c("integer", "numeric"),
+        predict_types = c("response", "prob"),
+        param_set = param_set,
+        properties = c("multiclass", "twoclass"),
+        man = "mlr3extralearners::mlr_learners_classif.mdeb",
+        label = "Minimum Distance Empirical Bayesian Classification"
+      )
+    }
+  ),
+  private = list(
+    .train = function(task) {
+      # get parameters for training
+      pars = self$param_set$get_values(tags = "train")
+
+      formula = task$formula()
+      data = task$data()
+
+      invoke(
+        sparsediscrim::lda_emp_bayes,
+        formula = formula,
+        data = data,
+        .args = pars
+      )
+    },
+    .predict = function(task) {
+      pars = self$param_set$get_values(tags = "predict")
+
+      # get newdata and ensure same ordering in train and predict
+      newdata = ordered_features(task, self)
+
+      # Calculate predictions for the selected predict type.
+      type = self$predict_type
+      if (type == "response") type = "class"
+
+      pred = invoke(predict, self$model, newdata = newdata, type = type, .args = pars)
+      if (type == "prob") pred = as.matrix(pred)
+      setNames(list(pred), self$predict_type)
+    }
+  )
+)
+
+.extralrns_dict$add("classif.mdeb", LearnerClassifMdeb)
diff --git a/R/learner_sparsediscrim_classif_sdlda.R b/R/learner_sparsediscrim_classif_sdlda.R
@@ -0,0 +1,75 @@
+#' @title Classification Linear Discriminant Analysis Learner
+#' @author annanzrv
+#' @name mlr_learners_classif.sdlda
+#'
+#' @description
+#' Shrinkage-based Diagonal Linear Discriminant Analysis classfier.
+#' Type of Naive Bayes classifiers that improves the estimation of the pooled variances by
+#' using a shrinkage-based estimator of the pooled covariance matrix.
+#' Calls [sparsediscrim::lda_shrink_cov()] \CRANpkg{sparsediscrim}.
+#'
+#' @templateVar id classif.sdlda
+#' @template learner
+#'
+#' @references
+#' `r format_bib("Peng2009sdlda")`
+#'
+#' @template seealso_learner
+#' @template example
+#' @export
+LearnerClassifSdlda = R6Class("LearnerClassifSdlda",
+  inherit = LearnerClassif,
+  public = list(
+    #' @description
+    #' Creates a new instance of this [R6][R6::R6Class] class.
+    initialize = function() {
+      param_set = ps(
+        num_alphas = p_int(default = 101, tags = "train"),
+        prior      = p_uty(default = NULL, tags = "train")
+      )
+
+      super$initialize(
+        id = "classif.sdlda",
+        packages = "sparsediscrim",
+        feature_types = c("integer", "numeric"),
+        predict_types = c("response", "prob"),
+        param_set = param_set,
+        properties = c("multiclass", "twoclass"),
+        man = "mlr3extralearners::mlr_learners_classif.sdlda",
+        label = "Shrinkage-based Diagonal Linear Discriminant Analysis"
+      )
+    }
+  ),
+  private = list(
+    .train = function(task) {
+      # get parameters for training
+      pars = self$param_set$get_values(tags = "train")
+
+      formula = task$formula()
+      data = task$data()
+
+      invoke(
+        sparsediscrim::lda_shrink_cov,
+        formula = formula,
+        data = data,
+        .args = pars
+      )
+    },
+    .predict = function(task) {
+      pars = self$param_set$get_values(tags = "predict")
+
+      # get newdata and ensure same ordering in train and predict
+      newdata = ordered_features(task, self)
+
+      # Calculate predictions for the selected predict type.
+      type = self$predict_type
+      if (type == "response") type = "class"
+
+      pred = invoke(predict, self$model, newdata = newdata, type = type, .args = pars)
+      if (type == "prob") pred = as.matrix(pred)
+      setNames(list(pred), self$predict_type)
+    }
+  )
+)
+
+.extralrns_dict$add("classif.sdlda", LearnerClassifSdlda)
diff --git a/man/mlr3extralearners-package.Rd b/man/mlr3extralearners-package.Rd
diff --git a/man/mlr_learners_classif.C50.Rd b/man/mlr_learners_classif.C50.Rd
diff --git a/man/mlr_learners_classif.abess.Rd b/man/mlr_learners_classif.abess.Rd