mlr-org
diff --git a/‎DESCRIPTION‎
Lines changed: 3 additions & 3 deletions b/‎DESCRIPTION‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎NEWS.md‎
Lines changed: 2 additions & 0 deletions b/‎NEWS.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎R/OMLCollection.R‎
Lines changed: 3 additions & 22 deletions b/‎R/OMLCollection.R‎
Lines changed: 3 additions & 22 deletions
diff --git a/‎R/OMLData.R‎
Lines changed: 2 additions & 1 deletion b/‎R/OMLData.R‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎R/benchmark_grid_oml.R‎
Lines changed: 11 additions & 11 deletions b/‎R/benchmark_grid_oml.R‎
Lines changed: 11 additions & 11 deletions
diff --git a/‎R/list_oml_data.R‎
Lines changed: 22 additions & 19 deletions b/‎R/list_oml_data.R‎
Lines changed: 22 additions & 19 deletions
diff --git a/‎R/list_oml_tasks.R‎
Lines changed: 1 addition & 1 deletion b/‎R/list_oml_tasks.R‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/utils.R‎
Lines changed: 2 additions & 2 deletions b/‎R/utils.R‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎R/zzz.R‎
Lines changed: 3 additions & 2 deletions b/‎R/zzz.R‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎cran-comments.md‎
Lines changed: 3 additions & 17 deletions b/‎cran-comments.md‎
Lines changed: 3 additions & 17 deletions
@@ -1,6 +1,6 @@
 Package: mlr3oml
 Title: Connector Between 'mlr3' and 'OpenML'
-Version: 0.6.0-9000
+Version: 0.7.0
 Authors@R:
     c(person("Michel", "Lang", , "[email protected]", role = "aut",
              comment = c(ORCID = "0000-0001-9754-0393")),
@@ -33,7 +33,7 @@ Imports:
     withr
 Suggests:
     DBI,
-    duckdb,
+    duckdb (>= 0.6.0),
     mlr3db (>= 0.5.0),
     qs,
     RWeka,
@@ -42,4 +42,4 @@ Config/testthat/edition: 3
 Encoding: UTF-8
 NeedsCompilation: yes
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.1
+RoxygenNote: 7.2.2
@@ -1,5 +1,7 @@
 # mlr3oml 0.7.0
 
+* feature: Add argument `task_type` to function `list_oml_tasks()`.
+* fix: strings and nominals are distinguished for parquet files
 * docs: Fixed some OpenML links
 * docs: Renamed the docs for OpenML objects
 * Renamed the sugar functions from:
 
@@ -44,32 +44,13 @@
 #'   run_collection = OMLCollection$new(id = 232)
 #'   # using sugar
 #'   run_collection = ocl(id = 232)
-#'   run_collection$main_entity_type
-#'   run_collection$tasks
-#'   run_collection$data
-#'   run_collection$flows
-#'   run_collection$runs
+#'   print(run_collection)
 #'
-#'   # mlr3 conversion:
-#'   tasks = as_tasks(run_collection)
-#'   resamplings = as_resamplings(run_collection)
-#'   learners = as_learners(run_collection, "classif")
-#'
-#'   bmr = as_benchmark_result(run_collection)
-#'   bmr$score(msr("classif.ce"))
-#'
-#'   # OpenML task collection
+#'   # OpenML task collection:
 #'   task_collection = OMLCollection$new(id = 258)
 #'   # using sugar
 #'   task_collection = ocl(id = 258)
-#'
-#'   task_collection$main_entity_type
-#'   task_collection$tasks
-#'   task_collection$data
-#'
-#'   # mlr3 conversion
-#'   tasks = as_tasks(task_collection)
-#'   resamplings = as_resamplings(task_collection)
+#'   print(task_collection)
 #'   }, silent = TRUE)
 OMLCollection = R6Class("OMLCollection",
   inherit = OMLObject,
 
@@ -233,7 +233,8 @@ OMLData = R6Class("OMLData",
         if (inherits(path, "try-error")) {
           lg$info("Failed to download parquet, trying arff.", id = self$id)
         } else {
-          backend = try(as_duckdb_backend_character(path, primary_key = primary_key), silent = TRUE)
+          factors = self$features[get("data_type") == "nominal", "name"][[1L]]
+          backend = try(as_duckdb_backend_character(path, primary_key = primary_key, factors = factors), silent = TRUE)
           if (inherits(backend, "try-error")) {
             msg = paste(
               "Parquet available but failed to create backend, reverting to arff.",
 
@@ -9,17 +9,17 @@
 #' @param resamplings (`list()` or `Resampling`) A list of [mlr3::Resampling]s that are instantiated on the given tasks.
 #'
 #' @examples
-#' \donttest{
-#' library("mlr3")
-#' collection = OMLCollection$new(258)
-#' otasks = collection$tasks[1:2, ][["task"]]
-#' tasks = as_tasks(otasks)
-#' resamplings = as_resamplings(otasks)
-#' learners = lrns(c("classif.rpart", "classif.featureless"))
-#' design = benchmark_grid_oml(tasks, learners, resamplings)
-#' print(design)
-#' bmr = benchmark(design)
-#' }
+#' try({
+#'   library("mlr3")
+#'   collection = OMLCollection$new(258)
+#'   otasks = collection$tasks[1:2, ][["task"]]
+#'   tasks = as_tasks(otasks)
+#'   resamplings = as_resamplings(otasks)
+#'   learners = lrns(c("classif.rpart", "classif.featureless"))
+#'   design = benchmark_grid_oml(tasks, learners, resamplings)
+#'   print(design)
+#'   bmr = benchmark(design)
+#' }, silent = TRUE)
 #' @return ([`data.table()`])
 #' @export
 benchmark_grid_oml = function(tasks, learners, resamplings) {
 
@@ -7,6 +7,9 @@
 #' This function allows to query data sets, tasks, flows, setups, runs, and evaluation measures
 #' from \url{https://www.openml.org/search?type=data&sort=runs&status=active} using some simple filter criteria.
 #'
+#' To find datasets for a specific task type, use [`list_oml_tasks()`] which supports filtering according to the task
+#' type.
+#'
 #' @details
 #' Filter values are usually provided as single atomic values (typically integer or character).
 #' Provide a numeric vector of length 2 (`c(l, u)`) to find matches in the range \eqn{[l, u]}.
@@ -44,29 +47,29 @@
 #'
 #' @export
 #' @examples
-#' \donttest{
-#' ### query data sets
-#' # search for titanic data set
-#' data_sets = list_oml_data(data_name = "titanic")
-#' print(data_sets)
+#' try({
+#'   ### query data sets
+#'   # search for titanic data set
+#'   data_sets = list_oml_data(data_name = "titanic")
+#'   print(data_sets)
 #'
-#' # search for a reduced version
-#' data_sets = list_oml_data(
-#'   data_name = "titanic",
-#'   number_instances = c(2200, 2300),
-#'   number_features = 4
-#' )
-#' print(data_sets)
+#'   # search for a reduced version
+#'   data_sets = list_oml_data(
+#'     data_name = "titanic",
+#'     number_instances = c(2200, 2300),
+#'     number_features = 4
+#'   )
+#'   print(data_sets)
 #'
-#' ### search tasks for this data set
-#' tasks = list_oml_tasks(data_id = data_sets$data_id)
-#' print(tasks)
+#'   ### search tasks for this data set
+#'   tasks = list_oml_tasks(data_id = data_sets$data_id)
+#'   print(tasks)
 #'
 #'
-#' # query runs, group by number of runs per task_id
-#' runs = list_oml_runs(task_id = tasks$task_id)
-#' runs[, .N, by = task_id]
-#' }
+#'   # query runs, group by number of runs per task_id
+#'   runs = list_oml_runs(task_id = tasks$task_id)
+#'   runs[, .N, by = task_id]
+#' }, silent = TRUE)
 list_oml_data = function(data_id = NULL, data_name = NULL, number_instances = NULL, number_features = NULL,
   number_classes = NULL, number_missing_values = NULL, tag = NULL, limit = limit_default(),
   test_server = test_server_default(), ...) {
 
@@ -2,7 +2,7 @@
 #' @param task_id (`integer()`)\cr
 #'   Vector of task ids to restrict to.
 #' @param type (`character(1)`)\cr
-#'   The task type, supported values are: clasisf, regr, surv and clust.
+#'   The task type, supported values are: `"clasisf"`, `"regr"`, `"surv"` and `"clust"`.
 #' @export
 list_oml_tasks = function(task_id = NULL, data_id = NULL, number_instances = NULL, number_features = NULL,
   number_classes = NULL, number_missing_values = NULL, tag = NULL, limit = limit_default(),
 
@@ -60,7 +60,7 @@ transpose_name_value = function(li, as_integer = FALSE) {
 }
 
 # remove this when it is merged in mlr3db (... in mlr3db is not passed to duckdb constructor...)
-as_duckdb_backend_character = function(data, primary_key = NULL) {
+as_duckdb_backend_character = function(data, primary_key = NULL, factors) {
   require_namespaces(c("DBI", "duckdb", "mlr3db"))
 
   assert_file_exists(data, access = "r", extension = "parquet")
@@ -123,7 +123,7 @@ as_duckdb_backend_character = function(data, primary_key = NULL) {
   }
 
   backend = mlr3db::DataBackendDuckDB$new(con, table = tbl, primary_key = primary_key,
-    strings_as_factors = TRUE
+    strings_as_factors = factors
   )
 
   on.exit()
 
@@ -39,7 +39,7 @@
 #' **Relevant for developers**
 #'
 #' * `mlr3oml.test_server`:
-#'   The default value for whether to use the OpenML [test server](https://test.openml.org/).
+#'   The default value for whether to use the OpenML test server.
 #'   Default is `FALSE`.
 #' * `mlr3oml.test_api_key`:
 #'   API key to use for the test server. If not set, defaults to the value of the environment
@@ -82,7 +82,8 @@ utils::globalVariables(c("super"))
   ResampleResult$private_fields$oml = NULL
   BenchmarkResult$private_fields$oml = NULL
   library.dynam.unload("mlr3oml", libpath)
+  mlr_tasks$remove("oml")
+  mlr_resamplings$remove("oml")
 } # nocov end
 
-
 leanify_package()
@@ -2,21 +2,7 @@
 
 None
 
-## R CMD check results
+## Comment
 
-There is only one note that informs about the maintainer change.
-
-Maintainer: 'Sebastian Fischer <[email protected]>'
-
-New maintainer:
-  Sebastian Fischer <[email protected]>
-Old maintainer(s):
-  Michel Lang <[email protected]>
-
-
-## Comments
-
-This package uses a REST API and therefore:
-
-* wraps examples in "\dontrun{...}"
-* disables tests relying on an existing server and internet connection on CRAN
+This release fixes the CRAN NOTE, that arose because examples did not fail gracefully when the OpenML server
+was not available.
Original file line number	Diff line number	Diff line change
`@@ -60,7 +60,7 @@ transpose_name_value = function(li, as_integer = FALSE) {`
`60`	`60`	`}`
`61`	`61`
`62`	`62`	`# remove this when it is merged in mlr3db (... in mlr3db is not passed to duckdb constructor...)`
`63`		`-as_duckdb_backend_character = function(data, primary_key = NULL) {`
	`63`	`+as_duckdb_backend_character = function(data, primary_key = NULL, factors) {`
`64`	`64`	`require_namespaces(c("DBI", "duckdb", "mlr3db"))`
`65`	`65`
`66`	`66`	`assert_file_exists(data, access = "r", extension = "parquet")`
`@@ -123,7 +123,7 @@ as_duckdb_backend_character = function(data, primary_key = NULL) {`
`123`	`123`	`}`
`124`	`124`
`125`	`125`	`backend = mlr3db::DataBackendDuckDB$new(con, table = tbl, primary_key = primary_key,`
`126`		`- strings_as_factors = TRUE`
	`126`	`+ strings_as_factors = factors`
`127`	`127`	`)`
`128`	`128`
`129`	`129`	`on.exit()`