Skip to content

Adding learner from package sparsediscrim #430

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ Authors@R: c(
comment = c(ORCID = "0000-0002-3609-8674")),
person("Lukas", "Burk", , "[email protected]", role = "ctb",
comment = c(ORCID = "0000-0001-7528-3795")),
person("Lona", "Koers", , "[email protected]", role = "ctb")
person("Lona", "Koers", , "[email protected]", role = "ctb"),
person("Anna", "Nazarova", , "[email protected]", role = "ctb")
)
Description: Extra learners for use in mlr3.
License: LGPL-3
Expand Down Expand Up @@ -100,6 +101,7 @@ Suggests:
sandwich,
set6,
sm,
sparsediscrim,
stats,
survival,
survivalmodels (>= 0.1.19),
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ export(LearnerClassifCTree)
export(LearnerClassifCatboost)
export(LearnerClassifDecisionStump)
export(LearnerClassifDecisionTable)
export(LearnerClassifDiagLda)
export(LearnerClassifEarth)
export(LearnerClassifFNN)
export(LearnerClassifGAMBoost)
Expand All @@ -29,6 +30,7 @@ export(LearnerClassifLSSVM)
export(LearnerClassifLiblineaR)
export(LearnerClassifLightGBM)
export(LearnerClassifLogistic)
export(LearnerClassifMdeb)
export(LearnerClassifMob)
export(LearnerClassifMultilayerPerceptron)
export(LearnerClassifNaiveBayesMultinomial)
Expand All @@ -45,6 +47,7 @@ export(LearnerClassifRandomPlantedForest)
export(LearnerClassifRandomTree)
export(LearnerClassifSGD)
export(LearnerClassifSMO)
export(LearnerClassifSdlda)
export(LearnerClassifSimpleLogistic)
export(LearnerClassifVotedPerceptron)
export(LearnerDensKDEks)
Expand Down
34 changes: 34 additions & 0 deletions R/bibentries.R
Original file line number Diff line number Diff line change
Expand Up @@ -712,6 +712,40 @@ bibentries = c( # nolint start
title = "Large margin classification using the perceptron algorithm",
year = "1998"
),
Dudoit2002 = bibentry("article",
title = "Comparison of Discrimination Methods for the Classification of Tumors Using Gene Expression Data",
author = "Sandrine Dudoit and Jane Fridlyand and Terence P Speed",
year = "2002",
journal = "Journal of the American Statistical Association",
volume = "97",
number = "457",
pages = "77--87",
publisher = "ASA Websites",
doi = "10.1198/016214502753479248"
),
Srivastava2007mdeb = bibentry("article",
title = "Comparison of Discrimination Methods for High Dimensional Data",
author = "Srivastava, M. and Kubokawa, T.",
year = "2007",
journal = "Journal of the Japanese Statistical Association",
volume = "37",
number = "1",
pages = "123--134"
),
Peng2009sdlda = bibentry("article",
ISSN = "0006341X, 15410420",
URL = "http://www.jstor.org/stable/20640622",
abstract = "High-dimensional data such as microarrays have brought us new statistical challenges. For example, using a large number of genes to classify samples based on a small number of microarrays remains a difficult problem. Diagonal discriminant analysis, support vector machines, and k-nearest neighbor have been suggested as among the best methods for small sample size situations, but none was found to be superior to others. In this article, we propose an improved diagonal discriminant approach through shrinkage and regularization of the variances. The performance of our new approach along with the existing methods is studied through simulations and applications to real data. These studies show that the proposed shrinkage-based and regularization diagonal discriminant methods have lower misclassification rates than existing methods in many cases.",
author = "Herbert Pang and Tiejun Tong and Hongyu Zhao",
journal = "Biometrics",
number = "4",
pages = "1021--1029",
publisher = "[Wiley, International Biometric Society]",
title = "Shrinkage-Based Diagonal Discriminant Analysis and Its Applications in High-Dimensional Data",
urldate = "2025-05-21",
volume = "65",
year = "2009"
),
faisolo2017qgam = bibentry("article",
title = "Fast Calibrated Additive Quantile Regression",
author = "Fasiolo, Matteo and Wood, Simon N. and Zaffran, Margaux and Nedellec, Raphael and Goude, Yannig",
Expand Down
77 changes: 77 additions & 0 deletions R/learner_sparsediscrim_classif_dlda.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#' @title Classification Diagonal Linear Discriminant Analysis Learner
#' @author annanzrv
#' @name mlr_learners_classif.dlda
#'
#' @description
#' Diagonal Linear Discriminant Analysis classifier.
#' Belongs to the family of Naive Bayes classifiers, where the distributions of
#' each class are assumed to be multivariate normal and to share a common
#' covariance matrix. Off-diagonal elements of the pooled sample covariance matrix
#' are set to zero
#' Calls [sparsediscrim::lda_diag()] from \CRANpkg{sparsediscrim}.
#'
#'
#' @templateVar id classif.dlda
#' @template learner
#'
#' @references
#' `r format_bib("Dudoit2002)`
#'
#' @template seealso_learner
#' @template example
#' @export
LearnerClassifDiagLda = R6Class("LearnerClassifDiagLda",
inherit = LearnerClassif,
public = list(
#' @description
#' Creates a new instance of this [R6][R6::R6Class] class.
initialize = function() {
param_set = ps(
prior = p_uty(default = NULL, tags = "train")
)

super$initialize(
id = "classif.dlda",
packages = "sparsediscrim",
feature_types = c("integer", "numeric"),
predict_types = c("response", "prob"),
param_set = param_set,
properties = c("multiclass", "twoclass"),
man = "mlr3extralearners::mlr_learners_classif.dlda",
label = "Diagonal Linear Discriminant Analysis"
)
}
),
private = list(
.train = function(task) {
# get parameters for training
pars = self$param_set$get_values(tags = "train")

formula = task$formula()
data = task$data()

invoke(
sparsediscrim::lda_diag,
formula = formula,
data = data,
.args = pars
)
},
.predict = function(task) {
pars = self$param_set$get_values(tags = "predict")

# get newdata and ensure same ordering in train and predict
newdata = ordered_features(task, self)

# Calculate predictions for the selected predict type.
type = self$predict_type
if (type == "response") type = "class"

pred = invoke(predict, self$model, newdata = newdata, type = type, .args = pars)
if (type == "prob") pred = as.matrix(pred)
setNames(list(pred), self$predict_type)
}
)
)

.extralrns_dict$add("classif.dlda", LearnerClassifDiagLda)
74 changes: 74 additions & 0 deletions R/learner_sparsediscrim_classif_mdeb.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#' @title Classification Linear Discriminant Analysis Learner
#' @author annanzrv
#' @name mlr_learners_classif.mdeb
#'
#' @description
#' Minimum Distance Empirical Bayesian classification, designed for small-sample, high-dimensional data.
#' Empirical Bayes estimator where the eigenvalues of the pooled sample covariance matrix are shrunken towards
#' the identity matrix.
#' Calls [sparsediscrim::lda_emp_bayes()] from FIXME: \CRANpkg{sparsediscrim}.
#'
#' @templateVar id classif.mdeb
#' @template learner
#'
#' @references
#' `r format_bib("Srivastava2007mdeb")`
#'
#' @template seealso_learner
#' @template example
#' @export
LearnerClassifMdeb = R6Class("LearnerClassifMdeb",
inherit = LearnerClassif,
public = list(
#' @description
#' Creates a new instance of this [R6][R6::R6Class] class.
initialize = function() {
param_set = ps(
prior = p_uty(default = NULL, tags = "train")
)

super$initialize(
id = "classif.dlda",
packages = "sparsediscrim",
feature_types = c("integer", "numeric"),
predict_types = c("response", "prob"),
param_set = param_set,
properties = c("multiclass", "twoclass"),
man = "mlr3extralearners::mlr_learners_classif.mdeb",
label = "Minimum Distance Empirical Bayesian Classification"
)
}
),
private = list(
.train = function(task) {
# get parameters for training
pars = self$param_set$get_values(tags = "train")

formula = task$formula()
data = task$data()

invoke(
sparsediscrim::lda_emp_bayes,
formula = formula,
data = data,
.args = pars
)
},
.predict = function(task) {
pars = self$param_set$get_values(tags = "predict")

# get newdata and ensure same ordering in train and predict
newdata = ordered_features(task, self)

# Calculate predictions for the selected predict type.
type = self$predict_type
if (type == "response") type = "class"

pred = invoke(predict, self$model, newdata = newdata, type = type, .args = pars)
if (type == "prob") pred = as.matrix(pred)
setNames(list(pred), self$predict_type)
}
)
)

.extralrns_dict$add("classif.mdeb", LearnerClassifMdeb)
75 changes: 75 additions & 0 deletions R/learner_sparsediscrim_classif_sdlda.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#' @title Classification Linear Discriminant Analysis Learner
#' @author annanzrv
#' @name mlr_learners_classif.sdlda
#'
#' @description
#' Shrinkage-based Diagonal Linear Discriminant Analysis classfier.
#' Type of Naive Bayes classifiers that improves the estimation of the pooled variances by
#' using a shrinkage-based estimator of the pooled covariance matrix.
#' Calls [sparsediscrim::lda_shrink_cov()] \CRANpkg{sparsediscrim}.
#'
#' @templateVar id classif.sdlda
#' @template learner
#'
#' @references
#' `r format_bib("Peng2009sdlda")`
#'
#' @template seealso_learner
#' @template example
#' @export
LearnerClassifSdlda = R6Class("LearnerClassifSdlda",
inherit = LearnerClassif,
public = list(
#' @description
#' Creates a new instance of this [R6][R6::R6Class] class.
initialize = function() {
param_set = ps(
num_alphas = p_int(default = 101, tags = "train"),
prior = p_uty(default = NULL, tags = "train")
)

super$initialize(
id = "classif.sdlda",
packages = "sparsediscrim",
feature_types = c("integer", "numeric"),
predict_types = c("response", "prob"),
param_set = param_set,
properties = c("multiclass", "twoclass"),
man = "mlr3extralearners::mlr_learners_classif.sdlda",
label = "Shrinkage-based Diagonal Linear Discriminant Analysis"
)
}
),
private = list(
.train = function(task) {
# get parameters for training
pars = self$param_set$get_values(tags = "train")

formula = task$formula()
data = task$data()

invoke(
sparsediscrim::lda_shrink_cov,
formula = formula,
data = data,
.args = pars
)
},
.predict = function(task) {
pars = self$param_set$get_values(tags = "predict")

# get newdata and ensure same ordering in train and predict
newdata = ordered_features(task, self)

# Calculate predictions for the selected predict type.
type = self$predict_type
if (type == "response") type = "class"

pred = invoke(predict, self$model, newdata = newdata, type = type, .args = pars)
if (type == "prob") pred = as.matrix(pred)
setNames(list(pred), self$predict_type)
}
)
)

.extralrns_dict$add("classif.sdlda", LearnerClassifSdlda)
1 change: 1 addition & 0 deletions man/mlr3extralearners-package.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/mlr_learners_classif.C50.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/mlr_learners_classif.abess.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading