diff --git a/.Rbuildignore b/.Rbuildignore index 0feda229a..cd8350605 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -23,4 +23,6 @@ ^cran-comments\.md$ ^CRAN-SUBMISSION$ ^benchmark$ +^attic$ ^.cursor$ + diff --git a/.gitignore b/.gitignore index 4be7bfc62..dcf4d2fc1 100644 --- a/.gitignore +++ b/.gitignore @@ -183,3 +183,4 @@ revdep/ # misc Meta/ Rplots.pdf +.cursor/ diff --git a/DESCRIPTION b/DESCRIPTION index c79f908d6..b41b9c50b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -67,12 +67,15 @@ Suggests: codetools, datasets, future.callr, + mirai (>= 2.3.0), mlr3data, progressr, remotes, RhpcBLASctl, rpart, testthat (>= 3.2.0) +Remotes: + mlr-org/mlr3misc Encoding: UTF-8 Config/testthat/edition: 3 Config/testthat/parallel: false diff --git a/R/Learner.R b/R/Learner.R index 26722c664..6e00a9437 100644 --- a/R/Learner.R +++ b/R/Learner.R @@ -536,6 +536,11 @@ Learner = R6Class("Learner", #' * `"callr"`: Uses the package \CRANpkg{callr} to call the learner, measure time and do the logging. #' This encapsulation spawns a separate R session in which the learner is called. #' While this comes with a considerable overhead, it also guards your session from being teared down by segfaults. + #' * `"mirai"`: Uses the package \CRANpkg{mirai} to call the learner, measure time and do the logging. + #' This encapsulation calls the function in a `mirai` on a `daemon`. + #' The `daemon` can be pre-started via `daemons(1)`, otherwise a new R session will be created for each encapsulated call. + #' If a `deamon` is already running, it will be used to executed all calls. + #' Using `mirai"` is similarly safe as `callr` but much faster if several learners are encapsulated one after the other on the same daemon. #' #' The fallback learner is fitted to create valid predictions in case that either the model fitting or the prediction of the original learner fails. #' If the training step or the predict step of the original learner fails, the fallback is used to make the predictions. @@ -554,7 +559,7 @@ Learner = R6Class("Learner", #' #' @return `self` (invisibly). encapsulate = function(method, fallback = NULL) { - assert_choice(method, c("none", "try", "evaluate", "callr")) + assert_choice(method, c("none", "try", "evaluate", "callr", "mirai")) if (method != "none") { assert_learner(fallback, task_type = self$task_type) diff --git a/R/helper_exec.R b/R/helper_exec.R index cc3f95296..6868aecbc 100644 --- a/R/helper_exec.R +++ b/R/helper_exec.R @@ -23,6 +23,9 @@ future_map = function(n, FUN, ..., MoreArgs = list()) { if (getOption("mlr3.debug", FALSE)) { lg$info("Running experiments sequentially in debug mode with %i iterations", n) mapply(FUN, ..., MoreArgs = MoreArgs, SIMPLIFY = FALSE, USE.NAMES = FALSE) + } else if (requireNamespace("mirai", quietly = TRUE) && mirai::daemons_set()) { + lg$debug("Running resample() via mirai with %i iterations", n) + mirai::collect_mirai(mirai::mirai_map(data.table(...), FUN, .args = c(MoreArgs, list(is_sequential = FALSE)))) } else { is_sequential = inherits(plan(), "sequential") scheduling = if (!is_sequential && isTRUE(getOption("mlr3.exec_random", TRUE))) structure(TRUE, ordering = "random") else TRUE diff --git a/inst/testthat/helper_misc.R b/inst/testthat/helper_misc.R index 5b17636de..cff6d3244 100644 --- a/inst/testthat/helper_misc.R +++ b/inst/testthat/helper_misc.R @@ -17,6 +17,14 @@ with_future = function(backend, expr, ...) { force(expr) } +with_mirai = function(expr) { + requireNamespace("mirai") + mirai::daemons(1) + on.exit(mirai::daemons(0), add = TRUE) + force(expr) + expect_true(mirai::status()$mirai["completed"] > 0) +} + private = function(x) { x[[".__enclos_env__"]][["private"]] } diff --git a/man-roxygen/section_parallelization.R b/man-roxygen/section_parallelization.R index 6c4fed578..c419d4bef 100644 --- a/man-roxygen/section_parallelization.R +++ b/man-roxygen/section_parallelization.R @@ -1,8 +1,11 @@ #' @section Parallelization: #' -#' This function can be parallelized with the \CRANpkg{future} package. -#' One job is one resampling iteration, and all jobs are send to an apply function -#' from \CRANpkg{future.apply} in a single batch. +#' This function can be parallelized with the \CRANpkg{future} or \CRANpkg{mirai} package. +#' One job is one resampling iteration. +#' All jobs are send to an apply function from \CRANpkg{future.apply} or `mirai::mirai_map()` in a single batch. #' To select a parallel backend, use [future::plan()]. +#' To use `mirai`, call `mirai::daemons()` before calling this function. +#' The `future` package guarantees reproducible results independent of the parallel backend. +#' The results of `mirai` will not be the same but can be made reproducible by setting a `seed` and `dispatcher = FALSE` when calling `mirai::daemons()`. #' More on parallelization can be found in the book: #' \url{https://mlr3book.mlr-org.com/chapters/chapter10/advanced_technical_aspects_of_mlr3.html} diff --git a/man/Learner.Rd b/man/Learner.Rd index 96027fbd9..b1358ddac 100644 --- a/man/Learner.Rd +++ b/man/Learner.Rd @@ -638,6 +638,11 @@ Output is printed to the console and not logged. \item \code{"callr"}: Uses the package \CRANpkg{callr} to call the learner, measure time and do the logging. This encapsulation spawns a separate R session in which the learner is called. While this comes with a considerable overhead, it also guards your session from being teared down by segfaults. +\item \code{"mirai"}: Uses the package \CRANpkg{mirai} to call the learner, measure time and do the logging. +This encapsulation calls the function in a \code{mirai} on a \code{daemon}. +The \code{daemon} can be pre-started via \code{daemons(1)}, otherwise a new R session will be created for each encapsulated call. +If a \code{deamon} is already running, it will be used to executed all calls. +Using \verb{mirai"} is similarly safe as \code{callr} but much faster if several learners are encapsulated one after the other on the same daemon. } The fallback learner is fitted to create valid predictions in case that either the model fitting or the prediction of the original learner fails. diff --git a/man/benchmark.Rd b/man/benchmark.Rd index 23e9063c7..6472e81c8 100644 --- a/man/benchmark.Rd +++ b/man/benchmark.Rd @@ -100,10 +100,13 @@ The (list of) created measures can finally be passed to \verb{$aggregate()} or \ \section{Parallelization}{ -This function can be parallelized with the \CRANpkg{future} package. -One job is one resampling iteration, and all jobs are send to an apply function -from \CRANpkg{future.apply} in a single batch. +This function can be parallelized with the \CRANpkg{future} or \CRANpkg{mirai} package. +One job is one resampling iteration. +All jobs are send to an apply function from \CRANpkg{future.apply} or \code{mirai::mirai_map()} in a single batch. To select a parallel backend, use \code{\link[future:plan]{future::plan()}}. +To use \code{mirai}, call \code{mirai::daemons()} before calling this function. +The \code{future} package guarantees reproducible results independent of the parallel backend. +The results of \code{mirai} will not be the same but can be made reproducible by setting a \code{seed} and \code{dispatcher = FALSE} when calling \code{mirai::daemons()}. More on parallelization can be found in the book: \url{https://mlr3book.mlr-org.com/chapters/chapter10/advanced_technical_aspects_of_mlr3.html} } diff --git a/man/resample.Rd b/man/resample.Rd index d5e10af0d..1304ff283 100644 --- a/man/resample.Rd +++ b/man/resample.Rd @@ -101,10 +101,13 @@ The (list of) created measures can finally be passed to \verb{$aggregate()} or \ \section{Parallelization}{ -This function can be parallelized with the \CRANpkg{future} package. -One job is one resampling iteration, and all jobs are send to an apply function -from \CRANpkg{future.apply} in a single batch. +This function can be parallelized with the \CRANpkg{future} or \CRANpkg{mirai} package. +One job is one resampling iteration. +All jobs are send to an apply function from \CRANpkg{future.apply} or \code{mirai::mirai_map()} in a single batch. To select a parallel backend, use \code{\link[future:plan]{future::plan()}}. +To use \code{mirai}, call \code{mirai::daemons()} before calling this function. +The \code{future} package guarantees reproducible results independent of the parallel backend. +The results of \code{mirai} will not be the same but can be made reproducible by setting a \code{seed} and \code{dispatcher = FALSE} when calling \code{mirai::daemons()}. More on parallelization can be found in the book: \url{https://mlr3book.mlr-org.com/chapters/chapter10/advanced_technical_aspects_of_mlr3.html} } diff --git a/tests/testthat/test_parallel.R b/tests/testthat/test_parallel_future.R similarity index 100% rename from tests/testthat/test_parallel.R rename to tests/testthat/test_parallel_future.R diff --git a/tests/testthat/test_parallel_mirai.R b/tests/testthat/test_parallel_mirai.R new file mode 100644 index 000000000..261107816 --- /dev/null +++ b/tests/testthat/test_parallel_mirai.R @@ -0,0 +1,56 @@ +skip_if_not_installed("mirai") + +test_that("parallel resample", { + with_mirai({ + task = tsk("pima") + learner = lrn("classif.rpart") + rr = resample(task, learner, rsmp("cv", folds = 3)) + expect_resample_result(rr) + expect_data_table(rr$errors, nrows = 0L) + }) +}) + +test_that("parallel benchmark", { + task = tsk("pima") + learner = lrn("classif.rpart") + + with_mirai({ + bmr = benchmark(benchmark_grid(task, learner, rsmp("cv", folds = 3))) + }) + expect_benchmark_result(bmr) + expect_equal(bmr$aggregate(conditions = TRUE)$warnings, 0L) + expect_equal(bmr$aggregate(conditions = TRUE)$errors, 0L) +}) + +test_that("real parallel resample", { + with_mirai({ + task = tsk("pima") + learner = lrn("classif.rpart") + rr = resample(task, learner, rsmp("cv", folds = 3)) + + expect_resample_result(rr) + expect_data_table(rr$errors, nrows = 0L) + }) +}) + +test_that("data table threads are not changed in main session", { + skip_on_os("mac") # number of threads cannot be changed on mac + skip_on_cran() + + old_dt_threads = getDTthreads() + on.exit({ + setDTthreads(old_dt_threads) + }, add = TRUE) + setDTthreads(2L) + + task = tsk("sonar") + learner = lrn("classif.debug", predict_type = "prob") + resampling = rsmp("cv", folds = 3L) + measure = msr("classif.auc") + + rr1 = with_seed(123, resample(task, learner, resampling)) + expect_equal(getDTthreads(), 2L) + + rr2 = with_seed(123, with_mirai(resample(task, learner, resampling))) + expect_equal(getDTthreads(), 2L) +})