Skip to content

Commit 4ed71f1

Browse files
committed
test: shrinking test-pipeline to reduce overall test-time on cran
1 parent b58bb74 commit 4ed71f1

17 files changed

Lines changed: 1034 additions & 1506 deletions

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ VignetteBuilder:
4040
quarto
4141
Config/testthat/edition: 3
4242
Config/testthat/parallel: false
43-
Date/Publication: 2025-09-06 14:55:28.991938 UTC
43+
Date/Publication: 2025-09-08 06:54:14.612978 UTC
4444
Encoding: UTF-8
4545
Roxygen: list(markdown = TRUE)
4646
SystemRequirements: Quarto command line tools

NEWS.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66

77
#### Other changes
88

9+
- updated description and news.md
10+
([b58bb74](https://github.com/kapsner/mllrnrs/tree/b58bb74dca7b3d84f7e1d38cf7f252103a172459))
911
- updated pkg metadata
1012
([e9d8ed8](https://github.com/kapsner/mllrnrs/tree/e9d8ed8a6f28bdba4d46c08cb73f86d377e53811))
1113
- updated description and news.md
@@ -24,7 +26,7 @@
2426
([0b24c76](https://github.com/kapsner/mllrnrs/tree/0b24c762660896447ef8ee85991c54c510601078))
2527

2628
Full set of changes:
27-
[`v0.0.5...e9d8ed8`](https://github.com/kapsner/mllrnrs/compare/v0.0.5...e9d8ed8)
29+
[`v0.0.5...b58bb74`](https://github.com/kapsner/mllrnrs/compare/v0.0.5...b58bb74)
2830

2931
## v0.0.5 (2025-03-05)
3032

tests/testthat/test-binary.R

Lines changed: 365 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,365 @@
1+
library(mlbench)
2+
data("PimaIndiansDiabetes2")
3+
dataset <- PimaIndiansDiabetes2 |>
4+
data.table::as.data.table() |>
5+
na.omit()
6+
7+
seed <- 123
8+
feature_cols <- colnames(dataset)[1:8]
9+
10+
param_list_glmnet <- expand.grid(
11+
alpha = seq(0, 1, 0.05)
12+
)
13+
14+
if (isTRUE(as.logical(Sys.getenv("_R_CHECK_LIMIT_CORES_")))) {
15+
# on cran
16+
ncores <- 2L
17+
} else {
18+
ncores <- ifelse(
19+
test = parallel::detectCores() > 4,
20+
yes = 4L,
21+
no = ifelse(
22+
test = parallel::detectCores() < 2L,
23+
yes = 1L,
24+
no = parallel::detectCores()
25+
)
26+
)
27+
}
28+
29+
train_x <- model.matrix(
30+
~ -1 + .,
31+
dataset[, .SD, .SDcols = feature_cols]
32+
)
33+
train_y <- as.integer(dataset[, get("diabetes")]) - 1L
34+
35+
options("mlexperiments.bayesian.max_init" = 10L)
36+
37+
fold_list <- splitTools::create_folds(
38+
y = train_y,
39+
k = 3,
40+
type = "stratified",
41+
seed = seed
42+
)
43+
44+
45+
# ###########################################################################
46+
# %% glmnet
47+
# ###########################################################################
48+
49+
# ###########################################################################
50+
# %% NESTED CV
51+
# ###########################################################################
52+
53+
test_that(
54+
desc = "test nested cv, grid, binary - glmnet",
55+
code = {
56+
57+
skip_on_cran()
58+
59+
glmnet_optimizer <- mlexperiments::MLNestedCV$new(
60+
learner = mllrnrs::LearnerGlmnet$new(
61+
metric_optimization_higher_better = FALSE
62+
),
63+
strategy = "grid",
64+
fold_list = fold_list,
65+
k_tuning = 3L,
66+
ncores = ncores,
67+
seed = seed
68+
)
69+
set.seed(seed)
70+
random_grid <- sample(seq_len(nrow(param_list_glmnet)), 3)
71+
glmnet_optimizer$parameter_grid <- kdry::mlh_subset(
72+
param_list_glmnet,
73+
random_grid
74+
)
75+
glmnet_optimizer$split_type <- "stratified"
76+
77+
glmnet_optimizer$learner_args <- list(
78+
family = "binomial",
79+
type.measure = "class",
80+
standardize = TRUE
81+
)
82+
glmnet_optimizer$predict_args <- list(type = "response")
83+
glmnet_optimizer$performance_metric_args <- list(
84+
positive = "1",
85+
negative = "0"
86+
)
87+
glmnet_optimizer$performance_metric <- mlexperiments::metric("AUC")
88+
89+
# set data
90+
glmnet_optimizer$set_data(
91+
x = train_x,
92+
y = train_y
93+
)
94+
95+
cv_results <- glmnet_optimizer$execute()
96+
expect_type(cv_results, "list")
97+
expect_equal(dim(cv_results), c(3, 7))
98+
expect_true(inherits(
99+
x = glmnet_optimizer$results,
100+
what = "mlexCV"
101+
))
102+
}
103+
)
104+
105+
test_that(
106+
desc = "test nested cv, grid - glmnet, errors",
107+
code = {
108+
109+
glmnet_optimizer <- mlexperiments::MLNestedCV$new(
110+
learner = mllrnrs::LearnerGlmnet$new(
111+
metric_optimization_higher_better = FALSE
112+
),
113+
strategy = "grid",
114+
fold_list = fold_list,
115+
k_tuning = 3L,
116+
ncores = ncores,
117+
seed = seed
118+
)
119+
set.seed(seed)
120+
random_grid <- sample(seq_len(nrow(param_list_glmnet)), 3)
121+
glmnet_optimizer$parameter_grid <- kdry::mlh_subset(
122+
param_list_glmnet,
123+
random_grid
124+
)
125+
glmnet_optimizer$split_type <- "stratified"
126+
127+
glmnet_optimizer$learner_args <- list(
128+
type.measure = "class",
129+
standardize = TRUE
130+
)
131+
glmnet_optimizer$predict_args <- list(type = "response")
132+
glmnet_optimizer$performance_metric_args <- list(
133+
positive = "1",
134+
negative = "0"
135+
)
136+
glmnet_optimizer$performance_metric <- mlexperiments::metric("AUC")
137+
138+
# set data
139+
glmnet_optimizer$set_data(
140+
x = train_x,
141+
y = train_y
142+
)
143+
144+
expect_error(glmnet_optimizer$execute())
145+
}
146+
)
147+
148+
149+
# ###########################################################################
150+
# %% Lightgbm
151+
# ###########################################################################
152+
153+
param_list_lightgbm <- expand.grid(
154+
bagging_fraction = seq(0.6, 1, .2),
155+
feature_fraction = seq(0.6, 1, .2),
156+
min_data_in_leaf = seq(2, 10, 2),
157+
learning_rate = seq(0.1, 0.2, 0.1),
158+
num_leaves = seq(2, 20, 4),
159+
max_depth = -1L,
160+
verbose = -1L
161+
)
162+
163+
options("mlexperiments.bayesian.max_init" = 10L)
164+
options("mlexperiments.optim.lgb.nrounds" = 100L)
165+
options("mlexperiments.optim.lgb.early_stopping_rounds" = 10L)
166+
167+
# ###########################################################################
168+
# %% TUNING
169+
# ###########################################################################
170+
171+
lightgbm_bounds <- list(
172+
bagging_fraction = c(0.2, 1),
173+
feature_fraction = c(0.2, 1),
174+
min_data_in_leaf = c(2L, 12L),
175+
learning_rate = c(0.1, 0.2),
176+
num_leaves = c(2L, 20L)
177+
)
178+
optim_args <- list(
179+
iters.n = ncores,
180+
kappa = 3.5,
181+
acq = "ucb"
182+
)
183+
184+
# ###########################################################################
185+
# %% NESTED CV
186+
# ###########################################################################
187+
188+
test_that(
189+
desc = "test nested cv, bayesian, binary - lightgbm",
190+
code = {
191+
192+
lightgbm_optimizer <- mlexperiments::MLNestedCV$new(
193+
learner = mllrnrs::LearnerLightgbm$new(
194+
metric_optimization_higher_better = FALSE
195+
),
196+
strategy = "bayesian",
197+
fold_list = fold_list,
198+
k_tuning = 3L,
199+
ncores = ncores,
200+
seed = seed
201+
)
202+
203+
lightgbm_optimizer$parameter_bounds <- lightgbm_bounds
204+
lightgbm_optimizer$parameter_grid <- param_list_lightgbm
205+
lightgbm_optimizer$split_type <- "stratified"
206+
lightgbm_optimizer$optim_args <- optim_args
207+
208+
lightgbm_optimizer$learner_args <- list(
209+
objective = "binary",
210+
metric = "binary_logloss",
211+
cat_vars = c("pregnant", "pedigree")
212+
)
213+
lightgbm_optimizer$performance_metric_args <- list(
214+
positive = "1",
215+
negative = "0"
216+
)
217+
lightgbm_optimizer$performance_metric <- mlexperiments::metric("auc")
218+
219+
# set data
220+
lightgbm_optimizer$set_data(
221+
x = train_x,
222+
y = train_y
223+
)
224+
225+
cv_results <- lightgbm_optimizer$execute()
226+
expect_type(cv_results, "list")
227+
expect_equal(dim(cv_results), c(3, 12))
228+
expect_true(inherits(
229+
x = lightgbm_optimizer$results,
230+
what = "mlexCV"
231+
))
232+
}
233+
)
234+
235+
236+
# ###########################################################################
237+
# %% Ranger
238+
# ###########################################################################
239+
240+
241+
param_list_ranger <- expand.grid(
242+
num.trees = seq(500, 1000, 500),
243+
mtry = seq(2, 6, 2),
244+
min.node.size = seq(1, 9, 4),
245+
max.depth = seq(1, 9, 4),
246+
sample.fraction = seq(0.5, 0.8, 0.3)
247+
)
248+
249+
# ###########################################################################
250+
# %% NESTED CV
251+
# ###########################################################################
252+
253+
test_that(
254+
desc = "test nested cv, grid, binary - ranger",
255+
code = {
256+
257+
ranger_optimizer <- mlexperiments::MLNestedCV$new(
258+
learner = mllrnrs::LearnerRanger$new(),
259+
strategy = "grid",
260+
fold_list = fold_list,
261+
k_tuning = 3L,
262+
ncores = ncores,
263+
seed = seed
264+
)
265+
set.seed(seed)
266+
random_grid <- sample(seq_len(nrow(param_list_ranger)), 3)
267+
ranger_optimizer$parameter_grid <-
268+
param_list_ranger[random_grid, ]
269+
ranger_optimizer$split_type <- "stratified"
270+
271+
ranger_optimizer$learner_args <- list(probability = TRUE,
272+
cat_vars = c("pregnant", "pedigree"))
273+
ranger_optimizer$predict_args <- list(prob = TRUE, positive = "1")
274+
275+
ranger_optimizer$performance_metric_args <- list(
276+
positive = "1",
277+
negative = "0"
278+
)
279+
ranger_optimizer$performance_metric <- mlexperiments::metric("AUC")
280+
281+
# set data
282+
ranger_optimizer$set_data(
283+
x = train_x,
284+
y = factor(train_y)
285+
)
286+
287+
cv_results <- ranger_optimizer$execute()
288+
expect_type(cv_results, "list")
289+
expect_equal(dim(cv_results), c(3, 8))
290+
expect_true(inherits(
291+
x = ranger_optimizer$results,
292+
what = "mlexCV"
293+
))
294+
}
295+
)
296+
297+
298+
# ###########################################################################
299+
# %% xgboost
300+
# ###########################################################################
301+
302+
param_list_xgboost <- expand.grid(
303+
subsample = seq(0.6, 1, .2),
304+
colsample_bytree = seq(0.6, 1, .2),
305+
min_child_weight = seq(1, 5, 4),
306+
learning_rate = seq(0.1, 0.2, 0.1),
307+
max_depth = seq(1, 5, 4)
308+
)
309+
310+
ncores <- 2L
311+
312+
options("mlexperiments.bayesian.max_init" = 10L)
313+
options("mlexperiments.optim.xgb.nrounds" = 100L)
314+
options("mlexperiments.optim.xgb.early_stopping_rounds" = 10L)
315+
316+
# ###########################################################################
317+
# %% NESTED CV
318+
# ###########################################################################
319+
320+
test_that(
321+
desc = "test nested cv, grid, binary:logistic - xgboost",
322+
code = {
323+
324+
xgboost_optimizer <- mlexperiments::MLNestedCV$new(
325+
learner = mllrnrs::LearnerXgboost$new(
326+
metric_optimization_higher_better = FALSE
327+
),
328+
strategy = "grid",
329+
fold_list = fold_list,
330+
k_tuning = 3L,
331+
ncores = ncores,
332+
seed = seed
333+
)
334+
set.seed(seed)
335+
random_grid <- sample(seq_len(nrow(param_list_xgboost)), 3)
336+
xgboost_optimizer$parameter_grid <-
337+
param_list_xgboost[random_grid, ]
338+
xgboost_optimizer$split_type <- "stratified"
339+
340+
xgboost_optimizer$learner_args <- list(
341+
objective = "binary:logistic",
342+
eval_metric = "logloss"
343+
)
344+
xgboost_optimizer$performance_metric_args <- list(
345+
positive = "1",
346+
negative = "0"
347+
)
348+
xgboost_optimizer$performance_metric <- mlexperiments::metric("auc")
349+
350+
# set data
351+
xgboost_optimizer$set_data(
352+
x = train_x,
353+
y = train_y
354+
)
355+
356+
cv_results <- xgboost_optimizer$execute()
357+
expect_type(cv_results, "list")
358+
expect_equal(dim(cv_results), c(3, 10))
359+
expect_true(inherits(
360+
x = xgboost_optimizer$results,
361+
what = "mlexCV"
362+
))
363+
}
364+
)
365+

0 commit comments

Comments
 (0)