mlr-org
diff --git a/‎.github/workflows/r-cmd-check-dev.yml‎
Lines changed: 0 additions & 58 deletions b/‎.github/workflows/r-cmd-check-dev.yml‎
Lines changed: 0 additions & 58 deletions
diff --git a/‎.github/workflows/r-cmd-check.yml‎
Lines changed: 2 additions & 0 deletions b/‎.github/workflows/r-cmd-check.yml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎DESCRIPTION‎
Lines changed: 4 additions & 2 deletions b/‎DESCRIPTION‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎NAMESPACE‎
Lines changed: 3 additions & 0 deletions b/‎NAMESPACE‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎NEWS.md‎
Lines changed: 5 additions & 1 deletion b/‎NEWS.md‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎R/defaults.R‎
Lines changed: 1 addition & 1 deletion b/‎R/defaults.R‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/publish_collection.R‎
Lines changed: 71 additions & 0 deletions b/‎R/publish_collection.R‎
Lines changed: 71 additions & 0 deletions
diff --git a/‎R/publish_data.R‎
Lines changed: 101 additions & 0 deletions b/‎R/publish_data.R‎
Lines changed: 101 additions & 0 deletions
diff --git a/‎R/publish_task.R‎
Lines changed: 83 additions & 0 deletions b/‎R/publish_task.R‎
Lines changed: 83 additions & 0 deletions
@@ -50,6 +50,8 @@ jobs:
           needs: check
 
       - uses: r-lib/actions/check-r-package@v2
+        env:
+          TESTOPENMLAPIKEY: ${{ secrets.TESTOPENMLAPIKEY }}
 
       - uses: mxschmitt/action-tmate@v3
         if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }}
 
@@ -10,7 +10,7 @@ Authors@R: c(
 Description: Provides an interface to 'OpenML.org' to list and download
     machine learning data, tasks and experiments. The 'OpenML' objects can
     be automatically converted to 'mlr3' objects.  For a more
-    sophisticated interface which also allows uploading to 'OpenML', see
+    sophisticated interface with more upload options, see
     the 'OpenML' package.
 License: LGPL-3
 URL: https://mlr3oml.mlr-org.com, https://github.com/mlr-org/mlr3oml
@@ -39,7 +39,9 @@ Suggests:
     mlr3db (>= 0.5.0),
     qs,
     RWeka,
-    testthat (>= 3.0.0)
+    testthat (>= 3.0.0),
+    xml2,
+    httr
 Config/testthat/edition: 3
 Encoding: UTF-8
 NeedsCompilation: yes
 
@@ -32,6 +32,9 @@ export(odt)
 export(oflw)
 export(orn)
 export(otsk)
+export(publish_collection)
+export(publish_data)
+export(publish_task)
 export(read_arff)
 export(write_arff)
 import(checkmate)
 
@@ -1,7 +1,11 @@
-# mlr3oml 0.8.0-9000
+# mlr3oml 0.9.0
 
 * Fix: Parquet datasets now work where columns simultaneously have to be renamed
 and converted.
+* Added upload functions:
+  * `publish_data` to upload a dataset on OpenML
+  * `publish_task` to create a task on OpenML
+  * `publish_collection` to create a collection on OpenML
 
 # mlr3oml 0.8.0
 
 
@@ -1,4 +1,4 @@
 parquet_default = function() getOption("mlr3oml.parquet", FALSE)
 test_server_default = function() getOption("mlr3oml.test_server", FALSE)
-cache_default = function() getOption("mlr3oml.cache", FALSE)
 limit_default = function() getOption("mlr3oml.limit", 5000L)
+
@@ -0,0 +1,71 @@
+#' @title Publish a Collection to OpenML
+#'
+#' @description
+#' Publish a collection to OpenML
+#' This can also be achieved through the [website](https://openml.org).
+#'
+#' @param ids (`integer()`)\cr
+#'   The IDs to include in the collection.
+#'   Depending on the main entity tupe, these can be task or run IDs.
+#' @param main_entity_type (`character(1)`)\cr
+#'   The main entity type of the collection. Can be either "task" or "run".
+#' @param name (`character(1)`)\cr
+#'   The name for the collection.
+#' @param desc (`character(1)`)\cr
+#'   The description of the collection.
+#' @param alias (`character(1)`)\cr
+#'   The alias for the collection.
+#' @template param_test_server
+#' @template param_api_key
+#'
+#' @export
+publish_collection = function(ids, name, desc, main_entity_type = "task", alias = NULL, api_key = NULL,
+  test_server = test_server_default()) {
+  require_namespaces(c("xml2", "httr"))
+  assert_flag(test_server)
+  if (is.null(api_key)) {
+    api_key = get_api_key(get_server(test_server))
+  } else {
+    assert_string(api_key)
+  }
+  assert_choice(main_entity_type, c("task", "run"))
+  assert_string(name)
+  assert_string(desc)
+  assert_string(alias, null.ok = TRUE)
+
+  doc = xml2::xml_new_document()
+  collection = xml2::xml_add_child(doc, "oml:study", "xmlns:oml" = "http://openml.org/openml")
+
+  # Order matters!
+  if (!is.null(alias)) xml2::xml_add_child(.x = collection, .value = "oml:alias", alias)
+  xml2::xml_add_child(.x = collection, .value = "oml:main_entity_type", main_entity_type)
+  xml2::xml_add_child(.x = collection, .value = "oml:name", name)
+  xml2::xml_add_child(.x = collection, .value = "oml:description", desc)
+
+  objects = xml2::xml_add_child(collection, .value = sprintf("oml:%ss", main_entity_type))
+  for (id in ids) {
+    xml2::xml_add_child(.x = objects, .value = sprintf("oml:%s_id", main_entity_type), id)
+  }
+
+  desc_path = tempfile(fileext = ".xml")
+  withr::defer(unlink(desc_path))
+  xml2::write_xml(x = doc, file = desc_path)
+
+  response = httr::POST(
+    url = sprintf("%s/study", get_server(test_server)),
+    body = list(
+      description = httr::upload_file(desc_path)
+    ),
+    query = list(api_key = api_key)
+  )
+
+
+  response_list = xml2::as_list(httr::content(response))
+  if (httr::http_error(response)) {
+    warningf(
+      paste(response_list$error$message, response_list$error$additional_information, collapse = "\n")
+    )
+    return(response)
+  }
+  as.integer(response_list$study_upload$id[[1L]])
+}
@@ -0,0 +1,101 @@
+#' @title Upload data to OpenML
+#'
+#' @description
+#' Upload a dataset to OpenML.
+#' This can also be achieved through the [website](https://openml.org).
+#'
+#' @param data ([`data.frame()`])\cr
+#'   The data to upload.
+#' @param name (`character(1)`)\cr
+#'   The name of the dataset.
+#' @param desc (`character(1)`)\cr
+#'   The description of the dataset.
+#' @param license (`character(1)`)\cr
+#'   The license of the dataset
+#' @param default_target (`character(1)`)\cr
+#'   The default target variable.
+#' @param citation (`character(1)`)\cr
+#'   How to cite the dataset.
+#' @param original_data_url (character(1))\cr
+#'   The URL of the original data set.
+#' @param paper_url (`character(1)`)\cr
+#'   The URL of the paper describing the data set.
+#' @param row_identifier (`character(1)`)\cr
+#'   Whether any of the columns is a row identifier.
+#' @param ignore_attribute (`character(1)`)\cr
+#'   Which columns to ignore during modeling.
+#' @template param_test_server
+#' @template param_api_key
+#'
+#' @export
+publish_data = function(data, name, desc, license = NULL, default_target = NULL, citation = NULL,
+  row_identifier = NULL, ignore_attribute = NULL, original_data_url = NULL, paper_url = NULL,
+  test_server = test_server_default(), api_key = NULL) {
+  require_namespaces(c("xml2", "httr"))
+  assert_flag(test_server)
+  if (is.null(api_key)) {
+    api_key = get_api_key(get_server(test_server))
+  } else {
+    assert_string(api_key)
+  }
+  assert_data_frame(data)
+  assert_subset(unique(map_chr(data, function(x) class(x)[[1L]])), c("numeric", "integer", "factor", "character"))
+  assert_string(name)
+  assert_string(desc)
+  assert_string(license, null.ok = TRUE)
+  assert_string(default_target, null.ok = TRUE)
+  assert_choice(default_target, colnames(data), null.ok = TRUE)
+  assert_choice(row_identifier, colnames(data), null.ok = TRUE)
+  assert_choice(ignore_attribute, colnames(data), null.ok = TRUE)
+  assert_string(citation, null.ok = TRUE)
+  assert_string(original_data_url, null.ok = TRUE)
+  assert_string(paper_url, null.ok = TRUE)
+
+  doc = xml2::xml_new_document()
+  dat = xml2::xml_add_child(doc, "oml:data_set_description", "xmlns:oml" = "http://openml.org/openml")
+
+  add = function(name, value) {
+    if (!is.null(value)) {
+      xml2::xml_add_child(.x = dat, .value = paste0("oml:", name), value)
+    }
+  }
+
+  # Order matters!
+  add("name", name)
+  add("description", desc)
+  add("format", "arff")
+  add("licence", license)
+  add("default_target_attribute", default_target)
+  add("row_id_attribute", row_identifier)
+  add("ignore_attribute", ignore_attribute)
+  add("citation", citation)
+  add("original_data_url", original_data_url)
+  add("paper_url", paper_url)
+
+  desc_path = tempfile(fileext = ".xml")
+  withr::defer(unlink(desc_path))
+  xml2::write_xml(x = doc, file = desc_path)
+
+  data_path = tempfile("arff")
+  withr::defer(unlink(data_path))
+  write_arff(data, data_path)
+
+  response = httr::POST(
+    url = sprintf("%s/data", get_server(test_server)),
+    body = list(
+      description = httr::upload_file(desc_path),
+      dataset = httr::upload_file(data_path)
+    ),
+    query = list(api_key = api_key)
+  )
+  response_list = xml2::as_list(httr::content(response))
+
+  if (httr::http_error(response)) {
+    warningf(
+      paste(response_list$error$message, response_list$error$additional_information, collapse = "\n")
+    )
+    return(response)
+  }
+
+  as.integer(response_list$upload_data_set$id[[1]])
+}
@@ -0,0 +1,83 @@
+#' @title Publish a task on OpenML
+#'
+#' @description
+#' Publish a task on OpenML.
+#' This can also be achieved through the [website](https://openml.org).
+#'
+#' @param id (`integer(1)`)\cr
+#'   The dataset id.
+#' @param type (`character(1)` or `integer(1)`)\cr
+#'   Can either be `"classif"` or `"regr"` or an integer indicating the task type.
+#' @param estimation_procedure (`integer(1)`)\cr
+#'   The id of the estimation procedure.
+#' @param target (`character(1)`)\cr
+#'   The target variable (if applicable).
+#' @template param_api_key
+#' @template param_test_server
+#'
+#' @export
+publish_task = function(id, type, estimation_procedure, target, api_key = NULL,
+  test_server = test_server_default()) {
+  require_namespaces(c("xml2", "httr"))
+  assert_flag(test_server)
+  if (is.null(api_key)) {
+    api_key = get_api_key(get_server(test_server))
+  } else {
+    assert_string(api_key)
+  }
+  assert_int(id, lower = 1L)
+  if (test_character(type, len = 1L)) {
+    type = switch(type,
+      regr = 2,
+      classif = 1,
+      stopf("Invalid type '%s'.", type)
+    )
+  } else {
+    assert_int(type, lower = 1L)
+  }
+  assert_character(target, len = 1L)
+  estimation_procedure = assert_int(estimation_procedure)
+
+  add = function(name, value) {
+    if (!is.null(value)) {
+      xml2::xml_add_child(.x = task, "oml:input", name = name, value)
+    }
+  }
+
+  doc = xml2::xml_new_document()
+  task = xml2::xml_add_child(doc, "oml:task_inputs", "xmlns:oml" = "http://openml.org/openml")
+  xml2::xml_add_child(task, "oml:task_type_id", type)
+  add("source_data", id)
+  if (!is.null(target)) add("target_feature", target)
+  add("estimation_procedure", estimation_procedure)
+
+  withr::defer(unlink(desc_path))
+  desc_path = tempfile(fileext = ".xml")
+  xml2::write_xml(x = doc, file = desc_path)
+
+  response = httr::POST(
+    url = sprintf("%s/task", get_server(test_server)),
+    body = list(
+      description = httr::upload_file(desc_path)
+    ),
+    query = list(api_key = api_key)
+  )
+
+  response_list = xml2::as_list(httr::content(response))
+  if (httr::http_error(response)) {
+    if (isTRUE(response_list$error$code[[1L]] == "614")) { # Task already exists.
+      info = response_list$error$additional_information[[1L]]
+      id = as.integer(substr(info, 17L, nchar(info) - 1L))
+      messagef("Task already exists with id %s.", id)
+      return(id)
+    } else {
+      warningf(
+        paste(response_list$error$message, response_list$error$additional_information, collapse = "\n")
+      )
+      return(response)
+    }
+  }
+
+  as.integer(response_list$upload_task$id[[1L]])
+}
+