Skip to content

Commit f7264f1

Browse files
authored
Version 0.7.0 (#93)
* docs: better naming for oml object help pages * docs: fix openml links * BREAKING CHANGE: Rename sugar functions To be more consistent with the rest of mlr3, we renamed: * `oml_data` --> `odt` * `oml_task` --> `otsk` * `oml_flow` --> `oflw` * `oml_run` --> `orn` * `oml_collection` --> `ocl` * ci: only run ci once every week * improve filtering tasks according to task type! * docs: knit readme * fix: examples fail gracefully when OpenML is busy * docs: typos in NEWS * chore: change error message * typo * * fix: strings and nominals are distinguished for parquet files * unload tasks and resamplings * remove test (openml bug was fixed) * docs: improve docu * update required duckdb version duckdb/duckdb#4806 * prepare for CRAN release * fix: wrap example in try statement (CRAN issue) * typo * safely escape example * update cran-comments * fix CRAN NOTE: too long runtime of example * document
1 parent a0bb0e5 commit f7264f1

22 files changed

+115
-160
lines changed

DESCRIPTION

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: mlr3oml
22
Title: Connector Between 'mlr3' and 'OpenML'
3-
Version: 0.6.0-9000
3+
Version: 0.7.0
44
Authors@R:
55
c(person("Michel", "Lang", , "[email protected]", role = "aut",
66
comment = c(ORCID = "0000-0001-9754-0393")),
@@ -33,7 +33,7 @@ Imports:
3333
withr
3434
Suggests:
3535
DBI,
36-
duckdb,
36+
duckdb (>= 0.6.0),
3737
mlr3db (>= 0.5.0),
3838
qs,
3939
RWeka,
@@ -42,4 +42,4 @@ Config/testthat/edition: 3
4242
Encoding: UTF-8
4343
NeedsCompilation: yes
4444
Roxygen: list(markdown = TRUE)
45-
RoxygenNote: 7.2.1
45+
RoxygenNote: 7.2.2

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# mlr3oml 0.7.0
22

3+
* feature: Add argument `task_type` to function `list_oml_tasks()`.
4+
* fix: strings and nominals are distinguished for parquet files
35
* docs: Fixed some OpenML links
46
* docs: Renamed the docs for OpenML objects
57
* Renamed the sugar functions from:

R/OMLCollection.R

Lines changed: 3 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -44,32 +44,13 @@
4444
#' run_collection = OMLCollection$new(id = 232)
4545
#' # using sugar
4646
#' run_collection = ocl(id = 232)
47-
#' run_collection$main_entity_type
48-
#' run_collection$tasks
49-
#' run_collection$data
50-
#' run_collection$flows
51-
#' run_collection$runs
47+
#' print(run_collection)
5248
#'
53-
#' # mlr3 conversion:
54-
#' tasks = as_tasks(run_collection)
55-
#' resamplings = as_resamplings(run_collection)
56-
#' learners = as_learners(run_collection, "classif")
57-
#'
58-
#' bmr = as_benchmark_result(run_collection)
59-
#' bmr$score(msr("classif.ce"))
60-
#'
61-
#' # OpenML task collection
49+
#' # OpenML task collection:
6250
#' task_collection = OMLCollection$new(id = 258)
6351
#' # using sugar
6452
#' task_collection = ocl(id = 258)
65-
#'
66-
#' task_collection$main_entity_type
67-
#' task_collection$tasks
68-
#' task_collection$data
69-
#'
70-
#' # mlr3 conversion
71-
#' tasks = as_tasks(task_collection)
72-
#' resamplings = as_resamplings(task_collection)
53+
#' print(task_collection)
7354
#' }, silent = TRUE)
7455
OMLCollection = R6Class("OMLCollection",
7556
inherit = OMLObject,

R/OMLData.R

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,8 @@ OMLData = R6Class("OMLData",
233233
if (inherits(path, "try-error")) {
234234
lg$info("Failed to download parquet, trying arff.", id = self$id)
235235
} else {
236-
backend = try(as_duckdb_backend_character(path, primary_key = primary_key), silent = TRUE)
236+
factors = self$features[get("data_type") == "nominal", "name"][[1L]]
237+
backend = try(as_duckdb_backend_character(path, primary_key = primary_key, factors = factors), silent = TRUE)
237238
if (inherits(backend, "try-error")) {
238239
msg = paste(
239240
"Parquet available but failed to create backend, reverting to arff.",

R/benchmark_grid_oml.R

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,17 @@
99
#' @param resamplings (`list()` or `Resampling`) A list of [mlr3::Resampling]s that are instantiated on the given tasks.
1010
#'
1111
#' @examples
12-
#' \donttest{
13-
#' library("mlr3")
14-
#' collection = OMLCollection$new(258)
15-
#' otasks = collection$tasks[1:2, ][["task"]]
16-
#' tasks = as_tasks(otasks)
17-
#' resamplings = as_resamplings(otasks)
18-
#' learners = lrns(c("classif.rpart", "classif.featureless"))
19-
#' design = benchmark_grid_oml(tasks, learners, resamplings)
20-
#' print(design)
21-
#' bmr = benchmark(design)
22-
#' }
12+
#' try({
13+
#' library("mlr3")
14+
#' collection = OMLCollection$new(258)
15+
#' otasks = collection$tasks[1:2, ][["task"]]
16+
#' tasks = as_tasks(otasks)
17+
#' resamplings = as_resamplings(otasks)
18+
#' learners = lrns(c("classif.rpart", "classif.featureless"))
19+
#' design = benchmark_grid_oml(tasks, learners, resamplings)
20+
#' print(design)
21+
#' bmr = benchmark(design)
22+
#' }, silent = TRUE)
2323
#' @return ([`data.table()`])
2424
#' @export
2525
benchmark_grid_oml = function(tasks, learners, resamplings) {

R/list_oml_data.R

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77
#' This function allows to query data sets, tasks, flows, setups, runs, and evaluation measures
88
#' from \url{https://www.openml.org/search?type=data&sort=runs&status=active} using some simple filter criteria.
99
#'
10+
#' To find datasets for a specific task type, use [`list_oml_tasks()`] which supports filtering according to the task
11+
#' type.
12+
#'
1013
#' @details
1114
#' Filter values are usually provided as single atomic values (typically integer or character).
1215
#' Provide a numeric vector of length 2 (`c(l, u)`) to find matches in the range \eqn{[l, u]}.
@@ -44,29 +47,29 @@
4447
#'
4548
#' @export
4649
#' @examples
47-
#' \donttest{
48-
#' ### query data sets
49-
#' # search for titanic data set
50-
#' data_sets = list_oml_data(data_name = "titanic")
51-
#' print(data_sets)
50+
#' try({
51+
#' ### query data sets
52+
#' # search for titanic data set
53+
#' data_sets = list_oml_data(data_name = "titanic")
54+
#' print(data_sets)
5255
#'
53-
#' # search for a reduced version
54-
#' data_sets = list_oml_data(
55-
#' data_name = "titanic",
56-
#' number_instances = c(2200, 2300),
57-
#' number_features = 4
58-
#' )
59-
#' print(data_sets)
56+
#' # search for a reduced version
57+
#' data_sets = list_oml_data(
58+
#' data_name = "titanic",
59+
#' number_instances = c(2200, 2300),
60+
#' number_features = 4
61+
#' )
62+
#' print(data_sets)
6063
#'
61-
#' ### search tasks for this data set
62-
#' tasks = list_oml_tasks(data_id = data_sets$data_id)
63-
#' print(tasks)
64+
#' ### search tasks for this data set
65+
#' tasks = list_oml_tasks(data_id = data_sets$data_id)
66+
#' print(tasks)
6467
#'
6568
#'
66-
#' # query runs, group by number of runs per task_id
67-
#' runs = list_oml_runs(task_id = tasks$task_id)
68-
#' runs[, .N, by = task_id]
69-
#' }
69+
#' # query runs, group by number of runs per task_id
70+
#' runs = list_oml_runs(task_id = tasks$task_id)
71+
#' runs[, .N, by = task_id]
72+
#' }, silent = TRUE)
7073
list_oml_data = function(data_id = NULL, data_name = NULL, number_instances = NULL, number_features = NULL,
7174
number_classes = NULL, number_missing_values = NULL, tag = NULL, limit = limit_default(),
7275
test_server = test_server_default(), ...) {

R/list_oml_tasks.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#' @param task_id (`integer()`)\cr
33
#' Vector of task ids to restrict to.
44
#' @param type (`character(1)`)\cr
5-
#' The task type, supported values are: clasisf, regr, surv and clust.
5+
#' The task type, supported values are: `"clasisf"`, `"regr"`, `"surv"` and `"clust"`.
66
#' @export
77
list_oml_tasks = function(task_id = NULL, data_id = NULL, number_instances = NULL, number_features = NULL,
88
number_classes = NULL, number_missing_values = NULL, tag = NULL, limit = limit_default(),

R/utils.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ transpose_name_value = function(li, as_integer = FALSE) {
6060
}
6161

6262
# remove this when it is merged in mlr3db (... in mlr3db is not passed to duckdb constructor...)
63-
as_duckdb_backend_character = function(data, primary_key = NULL) {
63+
as_duckdb_backend_character = function(data, primary_key = NULL, factors) {
6464
require_namespaces(c("DBI", "duckdb", "mlr3db"))
6565

6666
assert_file_exists(data, access = "r", extension = "parquet")
@@ -123,7 +123,7 @@ as_duckdb_backend_character = function(data, primary_key = NULL) {
123123
}
124124

125125
backend = mlr3db::DataBackendDuckDB$new(con, table = tbl, primary_key = primary_key,
126-
strings_as_factors = TRUE
126+
strings_as_factors = factors
127127
)
128128

129129
on.exit()

R/zzz.R

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
#' **Relevant for developers**
4040
#'
4141
#' * `mlr3oml.test_server`:
42-
#' The default value for whether to use the OpenML [test server](https://test.openml.org/).
42+
#' The default value for whether to use the OpenML test server.
4343
#' Default is `FALSE`.
4444
#' * `mlr3oml.test_api_key`:
4545
#' API key to use for the test server. If not set, defaults to the value of the environment
@@ -82,7 +82,8 @@ utils::globalVariables(c("super"))
8282
ResampleResult$private_fields$oml = NULL
8383
BenchmarkResult$private_fields$oml = NULL
8484
library.dynam.unload("mlr3oml", libpath)
85+
mlr_tasks$remove("oml")
86+
mlr_resamplings$remove("oml")
8587
} # nocov end
8688

87-
8889
leanify_package()

cran-comments.md

Lines changed: 3 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2,21 +2,7 @@
22

33
None
44

5-
## R CMD check results
5+
## Comment
66

7-
There is only one note that informs about the maintainer change.
8-
9-
Maintainer: 'Sebastian Fischer <[email protected]>'
10-
11-
New maintainer:
12-
Sebastian Fischer <[email protected]>
13-
Old maintainer(s):
14-
Michel Lang <[email protected]>
15-
16-
17-
## Comments
18-
19-
This package uses a REST API and therefore:
20-
21-
* wraps examples in "\dontrun{...}"
22-
* disables tests relying on an existing server and internet connection on CRAN
7+
This release fixes the CRAN NOTE, that arose because examples did not fail gracefully when the OpenML server
8+
was not available.

0 commit comments

Comments
 (0)