|
| 1 | +#' @title Upload data to OpenML |
| 2 | +#' |
| 3 | +#' @description |
| 4 | +#' Upload a dataset to OpenML. |
| 5 | +#' This can also be achieved through the [website](https://openml.org). |
| 6 | +#' |
| 7 | +#' @param data ([`data.frame()`])\cr |
| 8 | +#' The data to upload. |
| 9 | +#' @param name (`character(1)`)\cr |
| 10 | +#' The name of the dataset. |
| 11 | +#' @param desc (`character(1)`)\cr |
| 12 | +#' The description of the dataset. |
| 13 | +#' @param license (`character(1)`)\cr |
| 14 | +#' The license of the dataset |
| 15 | +#' @param default_target (`character(1)`)\cr |
| 16 | +#' The default target variable. |
| 17 | +#' @param citation (`character(1)`)\cr |
| 18 | +#' How to cite the dataset. |
| 19 | +#' @param original_data_url (character(1))\cr |
| 20 | +#' The URL of the original data set. |
| 21 | +#' @param paper_url (`character(1)`)\cr |
| 22 | +#' The URL of the paper describing the data set. |
| 23 | +#' @param row_identifier (`character(1)`)\cr |
| 24 | +#' Whether any of the columns is a row identifier. |
| 25 | +#' @param ignore_attribute (`character(1)`)\cr |
| 26 | +#' Which columns to ignore during modeling. |
| 27 | +#' @template param_test_server |
| 28 | +#' @template param_api_key |
| 29 | +#' |
| 30 | +#' @export |
| 31 | +publish_data = function(data, name, desc, license = NULL, default_target = NULL, citation = NULL, |
| 32 | + row_identifier = NULL, ignore_attribute = NULL, original_data_url = NULL, paper_url = NULL, |
| 33 | + test_server = test_server_default(), api_key = NULL) { |
| 34 | + require_namespaces(c("xml2", "httr")) |
| 35 | + assert_flag(test_server) |
| 36 | + if (is.null(api_key)) { |
| 37 | + api_key = get_api_key(get_server(test_server)) |
| 38 | + } else { |
| 39 | + assert_string(api_key) |
| 40 | + } |
| 41 | + assert_data_frame(data) |
| 42 | + assert_subset(unique(map_chr(data, function(x) class(x)[[1L]])), c("numeric", "integer", "factor", "character")) |
| 43 | + assert_string(name) |
| 44 | + assert_string(desc) |
| 45 | + assert_string(license, null.ok = TRUE) |
| 46 | + assert_string(default_target, null.ok = TRUE) |
| 47 | + assert_choice(default_target, colnames(data), null.ok = TRUE) |
| 48 | + assert_choice(row_identifier, colnames(data), null.ok = TRUE) |
| 49 | + assert_choice(ignore_attribute, colnames(data), null.ok = TRUE) |
| 50 | + assert_string(citation, null.ok = TRUE) |
| 51 | + assert_string(original_data_url, null.ok = TRUE) |
| 52 | + assert_string(paper_url, null.ok = TRUE) |
| 53 | + |
| 54 | + doc = xml2::xml_new_document() |
| 55 | + dat = xml2::xml_add_child(doc, "oml:data_set_description", "xmlns:oml" = "http://openml.org/openml") |
| 56 | + |
| 57 | + add = function(name, value) { |
| 58 | + if (!is.null(value)) { |
| 59 | + xml2::xml_add_child(.x = dat, .value = paste0("oml:", name), value) |
| 60 | + } |
| 61 | + } |
| 62 | + |
| 63 | + # Order matters! |
| 64 | + add("name", name) |
| 65 | + add("description", desc) |
| 66 | + add("format", "arff") |
| 67 | + add("licence", license) |
| 68 | + add("default_target_attribute", default_target) |
| 69 | + add("row_id_attribute", row_identifier) |
| 70 | + add("ignore_attribute", ignore_attribute) |
| 71 | + add("citation", citation) |
| 72 | + add("original_data_url", original_data_url) |
| 73 | + add("paper_url", paper_url) |
| 74 | + |
| 75 | + desc_path = tempfile(fileext = ".xml") |
| 76 | + withr::defer(unlink(desc_path)) |
| 77 | + xml2::write_xml(x = doc, file = desc_path) |
| 78 | + |
| 79 | + data_path = tempfile("arff") |
| 80 | + withr::defer(unlink(data_path)) |
| 81 | + write_arff(data, data_path) |
| 82 | + |
| 83 | + response = httr::POST( |
| 84 | + url = sprintf("%s/data", get_server(test_server)), |
| 85 | + body = list( |
| 86 | + description = httr::upload_file(desc_path), |
| 87 | + dataset = httr::upload_file(data_path) |
| 88 | + ), |
| 89 | + query = list(api_key = api_key) |
| 90 | + ) |
| 91 | + response_list = xml2::as_list(httr::content(response)) |
| 92 | + |
| 93 | + if (httr::http_error(response)) { |
| 94 | + warningf( |
| 95 | + paste(response_list$error$message, response_list$error$additional_information, collapse = "\n") |
| 96 | + ) |
| 97 | + return(response) |
| 98 | + } |
| 99 | + |
| 100 | + as.integer(response_list$upload_data_set$id[[1]]) |
| 101 | +} |
0 commit comments