diff --git a/.Rbuildignore b/.Rbuildignore index 70a566d2..b683a6fd 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,21 +1,36 @@ -^renv$ -^renv\.lock$ -^README\.Rmd$ -^README\.html$ -^LICENSE$ -.ignore -.editorconfig -.gitignore -^.*\.Rproj$ +^\.agents$ +^\.ccache$ +^\.clangd$ +^\.claude$ +^\.cspell$ +^\.cursor$ +^\.editorconfig$ +^\.git$ +^\.github$ +^\.gitignore$ +^\.ignore$ +^\.lintr$ ^\.Rproj\.user$ -^man-roxygen$ -^pkgdown$ ^\.vscode$ -^\.lintr$ -^\.github$ -^\.ccache$ -^docs$ -^revdep$ +^.*\.Rproj$ +^AGENTS.md$ +^air.toml$ +^attic$ +^attic_local$ +^CITATION.cff$ +^CLAUDE.md$ +^cspell.json$ +^CONTRIBUTING.md$ ^cran-comments\.md$ ^CRAN-SUBMISSION$ -^.claude$ +^docs$ +^inst/extdata/.+\.R$ +^LICENSE$ +^local_attic$ +^man-roxygen$ +^paper$ +^pkgdown$ +^README\.Rmd$ +^README.html$ +^revdep$ +^tests/testthat/_object_snapshots$ diff --git a/.agents/mlr3.md b/.agents/mlr3.md new file mode 100644 index 00000000..2fbf2b98 --- /dev/null +++ b/.agents/mlr3.md @@ -0,0 +1,132 @@ +### Architecture + +This package uses R6 classes organized around a dictionary registry pattern. + +#### Class hierarchy + +- `Learner` > `LearnerClassif` / `LearnerRegr` > concrete (e.g., `LearnerClassifRpart`) +- `Task` > `TaskSupervised` > `TaskClassif` / `TaskRegr` +- `Measure` > `MeasureClassif` / `MeasureRegr` / `MeasureSimilarity` +- `Resampling` > `ResamplingCV`, `ResamplingHoldout`, etc. +- `DataBackend` > `DataBackendDataTable`, `DataBackendCbind`, etc. +- `Prediction` > `PredictionClassif` / `PredictionRegr` + +#### File naming + +- One R6 class per file, named exactly as the class: `LearnerClassifRpart.R` contains `LearnerClassifRpart`. +- Named dataset tasks use an underscore: `TaskClassif_iris.R`. +- Dictionary files: `mlr_learners.R`, `mlr_tasks.R`, etc. + +#### Dictionary system + +Objects are registered in dictionaries and accessed via sugar functions: + +| Dictionary | Sugar | Example | +|-----------------------|----------------------|----------------------------------| +| `mlr_learners` | `lrn()` / `lrns()` | `lrn("classif.rpart", cp = 0.1)` | +| `mlr_tasks` | `tsk()` / `tsks()` | `tsk("iris")` | +| `mlr_measures` | `msr()` / `msrs()` | `msr("classif.ce")` | +| `mlr_resamplings` | `rsmp()` / `rsmps()` | `rsmp("cv", folds = 5)` | +| `mlr_task_generators` | `tgen()` / `tgens()` | `tgen("friedman1")` | + +Every new object **must** be registered at the bottom of its file: + +```r +#' @include mlr_learners.R +mlr_learners$add("classif.rpart", function() LearnerClassifRpart$new()) +``` + +#### Collation order + +Derived classes must declare `#' @include ParentClass.R` in their roxygen header. This controls the `Collate:` field in DESCRIPTION so base classes load before derived classes. + +#### Hyperparameters (paradox) + +Parameters are defined with `paradox::ps()` and must be tagged `"train"` or `"predict"`: + +```r +ps = ps( + cp = p_dbl(0, 1, default = 0.01, tags = "train"), + keep_model = p_lgl(default = FALSE, tags = "train") +) +``` + +In `.train()` / `.predict()`, retrieve values with `self$param_set$get_values(tags = "train")`. + +There is a distinction between `default` and `init` values: +- `default` describes the behavior when a parameter is not set at all (i.e., the upstream function's default). It is informational only. +- `init` (via `p_xxx(init = ...)`) sets the parameter to a value upon construction. Use this when the mlr3 default should differ from the upstream default. +- A parameter tagged `"required"` causes an error if not set. A required parameter cannot have a `default` (that would be contradictory). +- paradox does type-checking and range-checking automatically; `get_values()` checks that required params are present. Additional feasibility checks are rarely needed. + +#### Core dependencies + +`data.table`, `checkmate`, `mlr3misc`, `paradox`, `R6`, and `cli` are imported wholesale. Use their functions directly without `::`. Key mlr3misc utilities: `map()`, `map_chr()`, `invoke()`, `calculate_hash()`, `str_collapse()`, `%nin%`, `%??%`. + +#### Error handling + +Use structured error/warning functions from mlr3misc: `error_config()`, `error_input()`, `error_learner_train()`, `error_learner_predict()`, `warning_config()`, `warning_input()`. These support `sprintf`-style formatting. + +#### Reflections + +`mlr_reflections` is an environment that stores allowed types, properties, and roles. Extension packages modify it to register new task types. Check it when adding new properties or feature types. + +### Testing + +- Tests for `R/{name}.R` go in `tests/testthat/test_{name}.R`. +- All new code should have an accompanying test. +- If there are existing tests, place new tests next to similar existing tests. +- Strive to keep your tests minimal with few comments. +- The full test suite takes a long time. Only run tests relevant to your changes with `devtools::test(filter = '^{name}')`. +- New learners must pass `run_autotest()` and `run_paramtest()`. +- Use shared assertion helpers: `expect_learner()`, `expect_task()`, `expect_resampling()`, `expect_measure()`, `expect_prediction()`. +- Shared test infrastructure lives in `inst/testthat/` and is sourced by extension packages too. + +### Documentation + +- Every user-facing function should be exported and have roxygen2 documentation. +- Wrap roxygen comments at 120 characters. +- Write one sentence per line. +- If a sentence exceeds the limit, break at a comma, "and", "or", "but", or other appropriate point. +- Internal functions should not have roxygen documentation. +- Whenever you add a new (non-internal) documentation topic, also add the topic to `_pkgdown.yml`. +- Always re-document the package after changing a roxygen2 comment. +- Use `pkgdown::check_pkgdown()` to check that all topics are included in the reference index. +- Don’t hand-edit generated artifacts: `man/`, or `NAMESPACE`. +- Roxygen templates live in `man-roxygen/` (e.g., `@template learner`, `@template param_id`). Use `@templateVar` to pass values. +- Bibliographic references go in `R/bibentries.R` and are cited with `` `r format_bib("key")` ``. +- Man page names for dictionary objects follow `mlr_learners_classif.rpart`, `mlr_tasks_iris`, etc. +- When you write examples, make sure they work. + +### `NEWS.md` + +- Every user-facing change should be given a bullet in `NEWS.md`. Do not add bullets for small documentation changes or internal refactorings. +- Each bullet should briefly describe the change to the end user and mention the related issue in parentheses. +- A bullet can consist of multiple sentences but should not contain any new lines (i.e. DO NOT line wrap). +- If the change is related to a function, put the name of the function early in the bullet. +- Order bullets alphabetically by function name. Put all bullets that don't mention function names at the beginning. + +### GitHub + +- If you use `gh` to retrieve information about an issue, always use `--comments` to read all the comments. + +### Writing + +- Use sentence case for headings. +- Use US English. + +### Proofreading + +If the user asks you to proofread a file, act as an expert proofreader and editor with a deep understanding of clear, engaging, and well-structured writing. + +Work paragraph by paragraph, always starting by making a TODO list that includes individual items for each top-level heading. + +Fix spelling, grammar, and other minor problems without asking the user. Label any unclear, confusing, or ambiguous sentences with a FIXME comment. + +Only report what you have changed. + +### References + +- [mlr3book](https://mlr3book.mlr-org.com/) — comprehensive guide to the mlr3 ecosystem. +- [mlr3misc](https://github.com/mlr-org/mlr3misc) — helper functions used throughout the codebase. +- [paradox](https://github.com/mlr-org/paradox) — hyperparameter/configuration space definitions. diff --git a/.claude/settings.json b/.claude/settings.json new file mode 100644 index 00000000..7e60eaa6 --- /dev/null +++ b/.claude/settings.json @@ -0,0 +1,7 @@ +{ + "permissions": { + "allow": [ + "Bash(gh run view:*)" + ] + } +} diff --git a/.cspell/project-words.txt b/.cspell/project-words.txt new file mode 100644 index 00000000..78604a27 --- /dev/null +++ b/.cspell/project-words.txt @@ -0,0 +1,2 @@ +# Project-specific words — commit and share with the team. +# Add words here (or via "Add to project dictionary" in VS Code / Cursor). diff --git a/.editorconfig b/.editorconfig index f5ef1a53..5aa8919d 100644 --- a/.editorconfig +++ b/.editorconfig @@ -1,21 +1,11 @@ # See http://editorconfig.org root = true +# settings for all files [*] -charset = utf-8 -end_of_line = lf -insert_final_newline = true -indent_style = space -trim_trailing_whitespace = true - -[*.{r,R,md,Rmd}] -indent_size = 2 - -[*.{c,h}] -indent_size = 4 - -[*.{cpp,hpp}] -indent_size = 4 - -[{NEWS.md,DESCRIPTION,LICENSE}] -max_line_length = 80 +charset = utf-8 # Ensure all files are saved in UTF-8 encoding +end_of_line = lf # Use LF line endings (Unix style) +indent_style = space # Use spaces for indentation +indent_size = 2 # always use 2 spaces for indentation, R, C, python, etc. +max_line_length = 120 # max line length +trim_trailing_whitespace = true # Remove trailing whitespace diff --git a/.gitignore b/.gitignore index 848cfd50..22393818 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,6 @@ # File created using '.gitignore Generator' for Visual Studio Code: https://bit.ly/vscode-gig -# Created by https://www.toptal.com/developers/gitignore/api/windows,visualstudiocode,r,macos,linux -# Edit at https://www.toptal.com/developers/gitignore?templates=windows,visualstudiocode,r,macos,linux +# Created by https://www.toptal.com/developers/gitignore/api/windows,visualstudiocode,macos,linux,r +# Edit at https://www.toptal.com/developers/gitignore?templates=windows,visualstudiocode,macos,linux,r ### Linux ### *~ @@ -150,13 +150,17 @@ $RECYCLE.BIN/ # Windows shortcuts *.lnk -# End of https://www.toptal.com/developers/gitignore/api/windows,visualstudiocode,r,macos,linux +# End of https://www.toptal.com/developers/gitignore/api/windows,visualstudiocode,macos,linux,r # Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option) # R .Rprofile README.html +src/*.o +src/*.so +src/*.dll +.clangd # CRAN cran-comments.md @@ -170,10 +174,9 @@ docs/ renv/ renv.lock -# vscode -.vscode - # revdep revdep/ -check/* -.claude/ + +# AI +.claude/settings.local.json +CLAUDE.md \ No newline at end of file diff --git a/.lintr b/.lintr index 87ed0579..7ee15a4d 100644 --- a/.lintr +++ b/.lintr @@ -1,9 +1,13 @@ linters: linters_with_defaults( - # lintr defaults: https://github.com/jimhester/lintr#available-linters + # lintr defaults: https://lintr.r-lib.org/reference/default_linters.html # the following setup changes/removes certain linters assignment_linter = NULL, # do not force using <- for assignments - object_name_linter = object_name_linter(c("snake_case", "CamelCase")), # only allow snake case and camel case object names + object_name_linter = object_name_linter(c("snake_case", "CamelCase", "SNAKE_CASE")), # only allow snake case and camel case object names cyclocomp_linter = NULL, # do not check function complexity commented_code_linter = NULL, # allow code in comments - line_length_linter = line_length_linter(2000) + line_length_linter = line_length_linter(120L), # same as .editorconfig + # use indent=2 as in .editorconfig; also use block-aligned continuation with 2 space, + # not “align under first argument” style. + indentation_linter = indentation_linter(indent = 2L, hanging_indent_style = "never") ) + diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..e06676b3 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,34 @@ +{ + + // ********** settings git / gitlens ********** + + // disable "blame hover", to remove visual noise + "gitlens.currentLine.enabled": false, + + // ********** settings for cspell ************* + // show spelling errors as hints (not in problems panel) + "cSpell.diagnosticLevel": "Hint", + // file type whitelist, useGitignore, and languageSettings live in cspell.json + + // ********** settings for R ************* + + // format on save so we dont have to manually format, use AIR for formatting + "[r]": { + "editor.formatOnSave": true, + "editor.defaultFormatter": "Posit.air-vscode", + // disable hover for R, to remove visual noise + "editor.hover.enabled": false + }, + + // ********** settings for C / C++ ********** + + "[c]": { + "editor.formatOnSave": true, + "editor.defaultFormatter": "llvm-vs-code-extensions.vscode-clangd" + }, + "[cpp]": { + "editor.formatOnSave": true, + "editor.defaultFormatter": "llvm-vs-code-extensions.vscode-clangd" + } +} + diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 00000000..77945b9e --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,109 @@ +# R package development + +## Key commands + +``` +# To run code +Rscript -e "devtools::load_all(); code" + +# To run all tests +Rscript -e "devtools::test()" + +# To run all tests for files starting with {name} +Rscript -e "devtools::test(filter = '^{name}')" + +# To run all tests for R/{name}.R +Rscript -e "devtools::test_active_file('R/{name}.R')" + +# To run a single test "blah" for R/{name}.R +Rscript -e "devtools::test_active_file('R/{name}.R', desc = 'blah')" + +# To redocument the package +Rscript -e "devtools::document()" + +# To check pkgdown documentation +Rscript -e "pkgdown::check_pkgdown()" + +# To check the package with R CMD check +Rscript -e "devtools::check()" +``` + +## Code Style + +* Always use `=` for assignment, never `<-`. +* 2-space indentation, 120-character line limit. +* `snake_case` for functions and variables, `CamelCase` for R6 classes. +* When calling a function from imported package `foo` do not write `foo::bar()` but `bar()` +* Double quotes for strings, explicit `TRUE`/`FALSE` (never `T`/`F`), explicit `1L` for integers. +* Use implicit return values for functions. +* Prefer `result = if (...) ... else ...` over `if (...) { result = ... } else { result = ... }` + when the only difference between branches is the assigned value. +* User-facing API (exported functions, public R6 methods) must have `checkmate` `assert_*()` argument checks. + For internal code, match the existing level of defensiveness. +* Use these mlr3misc utilities when appropriate: + `map()`, `map_chr()`, `invoke()`, `calculate_hash()`, `str_collapse()`, `%nin%`, `%??%`. +* Before implementing something, read similar existing files first to match the established patterns. +* Always use `#nolint next` to disable linters for the next line instead of `# nolint` on the same line. + +## File structure and naming + +* Name the file as the most important contained function / class +* No whitespaces, no special chart in filenames +* Usually one large function / class, per file, but adding multiple smaller helpers is ok + +## Collation order + +* Derived classes must declare `#' @include ParentClass.R` in their roxygen header. + This controls the `Collate:` field in DESCRIPTION so base classes load before derived classes. + +## Core dependencies +* Use `checkmate` for arg-checks +* Use `data.table` for efficient table structures +* For OOP-stype use `R6` +* Use `cli` to format messages, warnings, errors and prints + +## Testing + +* Tests for `R/{name}.R` go in `tests/testthat/test_{name}.R`. +* All new code should have an accompanying test. +* If there are existing tests, place new tests next to similar existing tests. +* Strive to keep your tests minimal with few comments. +* The full test suite takes a long time. Only run tests relevant to your changes with `devtools::test(filter = '^{name}')`. + +## Documentation + +- Every user-facing function should be exported and have roxygen2 documentation. +- Wrap roxygen comments at 120 characters. +- Write one sentence per line. +- If a sentence exceeds the limit, break at a comma, "and", "or", "but", or other appropriate point. +- Internal functions should not have roxygen documentation. +- Always re-document the package after changing a roxygen2 comment. +- Don’t hand-edit generated artifacts: `man/`, or `NAMESPACE`. +- Roxygen templates live in `man-roxygen/` +- Bibliographic references go in `R/bibentries.R` and are cited with `` `r format_bib("key")` ``. + +## `NEWS.md` + +- Every user-facing change should be given a bullet in `NEWS.md`. + Do not add bullets for small documentation changes or internal refactorings. +- Each bullet should briefly describe the change to the end user and mention the related issue in parentheses. +- A bullet can consist of multiple sentences but should not contain any new lines (i.e. DO NOT line wrap). +- If the change is related to a function, put the name of the function early in the bullet. +- Order bullets alphabetically by function name. Put all bullets that don't mention function names at the beginning. + +## GitHub + +- If you use `gh` to retrieve information about an issue, always use `--comments` to read all the comments. + +## Natural Language + +- The following applies to all natural language text, so docs, commments, NEWS, etc, but not code +- Use American english +- Use the Oxford comma +- Do not capitalize normal nouns or method names. "Bayesian" is capitalized, "random forest" is not. +- Use cspell to check against typos, and add needed words to .cspell/project-words.txt if reasonable + +## Further agents files +- Read and respect all files in the `.agents` folder + + diff --git a/R/BenchmarkResult.R b/R/BenchmarkResult.R index e1bfb4df..ce5a099f 100644 --- a/R/BenchmarkResult.R +++ b/R/BenchmarkResult.R @@ -5,7 +5,8 @@ #' The argument `type` controls what kind of plot is drawn. #' Possible choices are: #' -#' * `"boxplot"` (default): Boxplots of performance measures, one box per [mlr3::Learner] and one facet per [mlr3::Task]. +#' * `"boxplot"` (default): Boxplots of performance measures, +#' one box per [mlr3::Learner] and one facet per [mlr3::Task]. #' * `"roc"`: ROC curve (1 - specificity on x, sensitivity on y). #' The [mlr3::BenchmarkResult] may only have a single [mlr3::Task] and a single [mlr3::Resampling]. #' Note that you can subset any [mlr3::BenchmarkResult] with its `$filter()` method (see examples). @@ -70,9 +71,9 @@ autoplot.BenchmarkResult = function(object, type = "boxplot", measure = NULL, th geom_errorbar(aes(ymin = .data[[paste0(mid, ".lower")]], ymax = .data[[paste0(mid, ".upper")]]), width = 0.2) + facet_wrap(vars(task_id), scales = "free_y") + labs( - title = sprintf("Confidence Intervals for alpha = %s", measure$param_set$values$alpha), - x = "Learner", - y = paste0(measure$measure$id) + title = sprintf("Confidence Intervals for alpha = %s", measure$param_set$values$alpha), + x = "Learner", + y = paste0(measure$measure$id) ) + theme + theme( @@ -89,15 +90,20 @@ autoplot.BenchmarkResult = function(object, type = "boxplot", measure = NULL, th learner_labels = learner_label_map$learner_id names(learner_labels) = learner_label_map$nr - switch(type, + switch( + type, "boxplot" = { - ggplot(tab, + ggplot( + tab, mapping = aes( x = .data$nr, - y = .data[[measure_id]])) + + y = .data[[measure_id]] + ) + ) + geom_boxplot( mapping = aes(fill = .data[["learner_id"]]), - show.legend = FALSE) + + show.legend = FALSE + ) + scale_x_discrete(labels = learner_labels) + # we need "free_x" to drop empty learners for certain tasks - because we apply over .data$nr facet_wrap(vars(.data$task_id), scales = "free_x") + @@ -139,7 +145,8 @@ plot.BenchmarkResult = function(x, ...) { } #' @export -fortify.BenchmarkResult = function(model, data = NULL, measure = NULL, ...) { # nolint +#nolint next +fortify.BenchmarkResult = function(model, data = NULL, measure = NULL, ...) { task = model$tasks$task[[1L]] measure = mlr3::assert_measure(mlr3::as_measure(measure, task_type = task$task_type), task = task) model$score(measures = measure)[, c("nr", "task_id", "learner_id", "resampling_id", measure$id), with = FALSE] diff --git a/R/EnsembleFSResult.R b/R/EnsembleFSResult.R index 8d32045f..9cf47346 100644 --- a/R/EnsembleFSResult.R +++ b/R/EnsembleFSResult.R @@ -82,21 +82,24 @@ autoplot.EnsembleFSResult = function( stability_args = NULL, theme = theme_minimal(), ... - ) { +) { assert_choice(type, choices = c("pareto", "performance", "n_features", "stability"), null.ok = FALSE) assert_choice(pareto_front, choices = c("stepwise", "estimated", "none")) result = object$result measure = object$measure # get active measure - measure_id = ifelse(object$active_measure == "inner", - sprintf("%s_inner", measure$id), - measure$id) + measure_id = ifelse(object$active_measure == "inner", sprintf("%s_inner", measure$id), measure$id) - switch(type, + switch( + type, "pareto" = { - p = ggplot(result, mapping = aes( - x = .data[["n_features"]], - y = .data[[measure_id]], - color = .data[["learner_id"]])) + + p = ggplot( + result, + mapping = aes( + x = .data[["n_features"]], + y = .data[[measure_id]], + color = .data[["learner_id"]] + ) + ) + geom_point() + scale_color_viridis_d("Learner ID", end = 0.8, alpha = 0.8) + labs(x = "Number of Features", y = measure_id) + @@ -106,27 +109,42 @@ autoplot.EnsembleFSResult = function( pf = object$pareto_front(type = "empirical") pf_step = stepwise_pf(pf) p = p + - geom_line(data = pf_step, mapping = aes( - x = .data[["n_features"]], - y = .data[[measure_id]]), - color = "black", linewidth = 0.7) + geom_line( + data = pf_step, + mapping = aes( + x = .data[["n_features"]], + y = .data[[measure_id]] + ), + color = "black", + linewidth = 0.7 + ) } else if (pareto_front == "estimated") { pfe = object$pareto_front(type = "estimated") p = p + - geom_line(data = pfe, mapping = aes( - x = .data[["n_features"]], - y = .data[[measure_id]]), - color = "black", linetype = "dashed", linewidth = 0.7) + geom_line( + data = pfe, + mapping = aes( + x = .data[["n_features"]], + y = .data[[measure_id]] + ), + color = "black", + linetype = "dashed", + linewidth = 0.7 + ) } p }, "performance" = { - ggplot(result, aes( - x = .data[["learner_id"]], - y = .data[[measure_id]], - fill = .data[["learner_id"]])) + + ggplot( + result, + aes( + x = .data[["learner_id"]], + y = .data[[measure_id]], + fill = .data[["learner_id"]] + ) + ) + geom_boxplot(show.legend = FALSE) + scale_fill_viridis_d(end = 0.8, alpha = 0.8) + labs(y = measure_id) + @@ -135,10 +153,14 @@ autoplot.EnsembleFSResult = function( }, "n_features" = { - ggplot(result, aes( - x = .data[["learner_id"]], - y = .data[["n_features"]], - fill = .data[["learner_id"]]))+ + ggplot( + result, + aes( + x = .data[["learner_id"]], + y = .data[["n_features"]], + fill = .data[["learner_id"]] + ) + ) + geom_boxplot(show.legend = FALSE) + scale_fill_viridis_d(end = 0.8, alpha = 0.8) + labs(y = "Number of Features") + @@ -152,13 +174,18 @@ autoplot.EnsembleFSResult = function( stability_measure = stability_measure, stability_args = stability_args, global = FALSE, - reset_cache = FALSE) + reset_cache = FALSE + ) data = data.table(learner_id = names(stab_res), value = stab_res) - ggplot(data, mapping = aes( - x = .data[["learner_id"]], - y = .data[["value"]], - fill = .data[["learner_id"]])) + + ggplot( + data, + mapping = aes( + x = .data[["learner_id"]], + y = .data[["value"]], + fill = .data[["learner_id"]] + ) + ) + geom_bar(stat = "identity", alpha = 0.8, show.legend = FALSE) + scale_fill_viridis_d(end = 0.8, alpha = 0.8) + labs(y = stability_measure) + @@ -186,11 +213,11 @@ stepwise_pf = function(pf) { # add intermediate point if applicable if (i < nrow(pf)) { - ok = pf[["n_features"]][i+1] > pf[["n_features"]][i] + ok = pf[["n_features"]][i + 1] > pf[["n_features"]][i] if (ok) { # more features, previous performance score - intermediate_point = data.table(n_features = pf[["n_features"]][i+1]) + intermediate_point = data.table(n_features = pf[["n_features"]][i + 1]) intermediate_point[, (measure_id) := pf[[measure_id]][i]] pf_step = rbind(pf_step, intermediate_point) } diff --git a/R/Filter.R b/R/Filter.R index 7f6927c9..8d4c6ce4 100644 --- a/R/Filter.R +++ b/R/Filter.R @@ -28,22 +28,28 @@ #' head(fortify(f)) #' autoplot(f, n = 5) #' } -autoplot.Filter = function(object, type = "boxplot", n = Inf, theme = theme_minimal(), ...) { # nolint +#nolint next +autoplot.Filter = function(object, type = "boxplot", n = Inf, theme = theme_minimal(), ...) { assert_choice(type, choices = c("boxplot"), null.ok = FALSE) data = head(fortify(object), n) - switch(type, + switch( + type, "boxplot" = { - ggplot(data, + ggplot( + data, mapping = aes( x = .data[["feature"]], - y = .data[["score"]])) + + y = .data[["score"]] + ) + ) + geom_bar( stat = "identity", fill = viridis::viridis(1, begin = 0.5), alpha = 0.8, - color = "#000000") + + color = "#000000" + ) + scale_x_discrete(limits = data$feature) + labs(x = "Feature", y = "Score") + theme + @@ -60,6 +66,7 @@ plot.Filter = function(x, ...) { } #' @export -fortify.Filter = function(model, data = NULL, ...) { # nolint +#nolint next +fortify.Filter = function(model, data = NULL, ...) { as.data.table(model) } diff --git a/R/LearnerClassif.R b/R/LearnerClassif.R index 16534fae..458b7af0 100644 --- a/R/LearnerClassif.R +++ b/R/LearnerClassif.R @@ -27,16 +27,29 @@ #' #' autoplot(learner, type = "prediction", task) #' } -autoplot.LearnerClassif = function(object, type = "prediction", task, grid_points = 100L, expand_range = 0, theme = theme_minimal(), ...) { # nolint +#nolint next +autoplot.LearnerClassif = function( + object, + type = "prediction", + task, + grid_points = 100L, + expand_range = 0, + theme = theme_minimal(), + ... +) { assert_choice(type, choices = c("prediction"), null.ok = FALSE) - switch(type, + switch( + type, "prediction" = { mlr3::assert_task(task) features = task$feature_names if (length(features) != 2L) { - mlr3misc::stopf("Plot learner prediction only works for tasks with two features for classification!", wrap = TRUE) + mlr3misc::stopf( + "Plot learner prediction only works for tasks with two features for classification!", + wrap = TRUE + ) } grid = predict_grid(list(object), task, grid_points = grid_points, expand_range = expand_range) @@ -45,10 +58,12 @@ autoplot.LearnerClassif = function(object, type = "prediction", task, grid_point # classif, probs raster_aes = aes( fill = .data[["response"]], - alpha = .data[[".prob.response"]]) + alpha = .data[[".prob.response"]] + ) scale_alpha = scale_alpha_continuous( name = "Probability", - guide = guide_legend(override.aes = list(fill = viridis::viridis(1)))) + guide = guide_legend(override.aes = list(fill = viridis::viridis(1))) + ) scale_fill = scale_fill_viridis_d(end = 0.8) guides = NULL } else if (object$predict_type == "response") { @@ -59,16 +74,20 @@ autoplot.LearnerClassif = function(object, type = "prediction", task, grid_point guides = NULL } - ggplot(grid, + ggplot( + grid, mapping = aes( x = .data[[features[1L]]], - y = .data[[features[2L]]])) + + y = .data[[features[2L]]] + ) + ) + geom_raster(raster_aes) + geom_point( mapping = aes(fill = .data[[task$target_names]]), data = task$data(), shape = 21, - color = "black") + + color = "black" + ) + scale_fill + guides + theme + diff --git a/R/LearnerClassifCVGlmnet.R b/R/LearnerClassifCVGlmnet.R index c95f6772..257dc768 100644 --- a/R/LearnerClassifCVGlmnet.R +++ b/R/LearnerClassifCVGlmnet.R @@ -1,9 +1,19 @@ #' @rdname autoplot.LearnerClassifGlmnet #' @export -autoplot.LearnerClassifCVGlmnet = function(object, type = "prediction", task = NULL, grid_points = 100L, expand_range = 0, theme = theme_minimal(), ...) { # nolint +#nolint next +autoplot.LearnerClassifCVGlmnet = function( + object, + type = "prediction", + task = NULL, + grid_points = 100L, + expand_range = 0, + theme = theme_minimal(), + ... +) { assert_choice(type, choices = c("prediction", "ggfortify"), null.ok = FALSE) - switch(type, + switch( + type, "prediction" = { NextMethod() }, diff --git a/R/LearnerClassifGlmnet.R b/R/LearnerClassifGlmnet.R index 6d870c6f..879b1bf6 100644 --- a/R/LearnerClassifGlmnet.R +++ b/R/LearnerClassifGlmnet.R @@ -8,7 +8,8 @@ #' * `"prediction"` (default): Decision boundary of the learner and the true class labels. #' * `"ggfortify"`: Visualizes the model using the package \CRANpkg{ggfortify}. #' -#' @param object ([mlr3learners::LearnerClassifGlmnet] | [mlr3learners::LearnerRegrGlmnet] | [mlr3learners::LearnerClassifCVGlmnet] | [mlr3learners::LearnerRegrCVGlmnet]). +#' @param object ([mlr3learners::LearnerClassifGlmnet] | [mlr3learners::LearnerRegrGlmnet] | +#' [mlr3learners::LearnerClassifCVGlmnet] | [mlr3learners::LearnerRegrCVGlmnet]). #' #' @template param_type #' @template param_task @@ -41,11 +42,21 @@ #' autoplot(learner, type = "ggfortify") #' } #' } -autoplot.LearnerClassifGlmnet = function(object, type = "prediction", task = NULL, grid_points = 100L, expand_range = 0, theme = theme_minimal(), ...) { # nolint +#nolint next +autoplot.LearnerClassifGlmnet = function( + object, + type = "prediction", + task = NULL, + grid_points = 100L, + expand_range = 0, + theme = theme_minimal(), + ... +) { assert_choice(type, choices = c("prediction", "ggfortify"), null.ok = FALSE) assert_has_model(object) - switch(type, + switch( + type, "prediction" = { NextMethod() }, diff --git a/R/LearnerClassifRpart.R b/R/LearnerClassifRpart.R index 523a0cd0..266efec9 100644 --- a/R/LearnerClassifRpart.R +++ b/R/LearnerClassifRpart.R @@ -34,11 +34,21 @@ #' learner$train(task) #' autoplot(learner, type = "ggparty") #' } -autoplot.LearnerClassifRpart = function(object, type = "prediction", task = NULL, grid_points = 100L, expand_range = 0, theme = theme_minimal(), ...) { # nolint +#nolint next +autoplot.LearnerClassifRpart = function( + object, + type = "prediction", + task = NULL, + grid_points = 100L, + expand_range = 0, + theme = theme_minimal(), + ... +) { assert_choice(type, choices = c("prediction", "ggparty"), null.ok = FALSE) assert_has_model(object) - switch(type, + switch( + type, "prediction" = { NextMethod() }, @@ -53,20 +63,25 @@ autoplot.LearnerClassifRpart = function(object, type = "prediction", task = NULL ggparty::geom_node_splitvar() + ggparty::geom_node_plot( gglist = list( - geom_bar(aes(x = "", fill = .data[[target]]), - alpha = 0.8, - color = "#000000", - linewidth = 0.5, - position = position_fill()), + geom_bar( + aes(x = "", fill = .data[[target]]), + alpha = 0.8, + color = "#000000", + linewidth = 0.5, + position = position_fill() + ), labs(x = target), scale_fill_viridis_d(end = 0.8), - theme), + theme + ), ids = "terminal", - shared_axis_labels= TRUE) + + shared_axis_labels = TRUE + ) + ggparty::geom_node_label( mapping = aes(label = paste0("n=", .data[["nodesize"]])), nudge_y = 0.03, - ids = "terminal") + ids = "terminal" + ) }, stopf("Unknown plot type '%s'", type) diff --git a/R/LearnerClustHierarchical.R b/R/LearnerClustHierarchical.R index 5ba9bcb6..8d39c659 100644 --- a/R/LearnerClustHierarchical.R +++ b/R/LearnerClustHierarchical.R @@ -8,7 +8,8 @@ #' * `"dend"` (default): Dendrograms using \CRANpkg{ggdendro} package. #' * `"scree"`: Scree plot that shows the number of possible clusters on the x-axis and the height on the y-axis. #' -#' @param object ([mlr3cluster::LearnerClustAgnes] | [mlr3cluster::LearnerClustDiana] | [mlr3cluster::LearnerClustHclust]). +#' @param object ([mlr3cluster::LearnerClustAgnes] | [mlr3cluster::LearnerClustDiana] | +#' [mlr3cluster::LearnerClustHclust]). #' @param task ([mlr3::Task])\cr #' Optionally, pass the task to add labels of observations to a `hclust` dendrogram. #' Labels are set via the row names of the task. @@ -43,7 +44,15 @@ #' learner$train(task) #' autoplot(learner, type = "scree") #' } -autoplot.LearnerClustHierarchical = function(object, type = "dend", task = NULL, theme = theme_minimal(), theme_dendro = TRUE, ...) { # nolint +#nolint next +autoplot.LearnerClustHierarchical = function( + object, + type = "dend", + task = NULL, + theme = theme_minimal(), + theme_dendro = TRUE, + ... +) { assert_choice(type, choices = c("dend", "scree"), null.ok = FALSE) if (is.null(object$model)) { @@ -53,7 +62,8 @@ autoplot.LearnerClustHierarchical = function(object, type = "dend", task = NULL, stopf("Learner '%s' must be hierarchical", object$id) } - switch(type, + switch( + type, "dend" = { require_namespaces("ggdendro") @@ -67,13 +77,13 @@ autoplot.LearnerClustHierarchical = function(object, type = "dend", task = NULL, "scree" = { data = data.table(Height = object$model$height, Clusters = seq(length(object$model$height), 1)) - ggplot(data, - mapping = aes(x = data$Clusters, y = data$Height)) + + ggplot(data, mapping = aes(x = data$Clusters, y = data$Height)) + geom_line(color = viridis::viridis(1, begin = 0.5)) + geom_point( size = 3, color = viridis::viridis(1, begin = 0.5), - alpha = 0.8) + + alpha = 0.8 + ) + labs(x = "Clusters", y = "Height") + theme } diff --git a/R/LearnerRegr.R b/R/LearnerRegr.R index 9ba8b94d..47327a5d 100644 --- a/R/LearnerRegr.R +++ b/R/LearnerRegr.R @@ -27,10 +27,20 @@ #' #' autoplot(learner, type = "prediction", task) #' } -autoplot.LearnerRegr = function(object, type = "prediction", task, grid_points = 100L, expand_range = 0, theme = theme_minimal(), ...) { # nolint +#nolint next +autoplot.LearnerRegr = function( + object, + type = "prediction", + task, + grid_points = 100L, + expand_range = 0, + theme = theme_minimal(), + ... +) { assert_choice(type, choices = c("prediction"), null.ok = FALSE) - switch(type, + switch( + type, "prediction" = { mlr3::assert_task(task) features = task$feature_names @@ -46,17 +56,22 @@ autoplot.LearnerRegr = function(object, type = "prediction", task, grid_points = se_geom = geom_ribbon( mapping = aes( ymin = .data[["response"]] - .data[["se"]], - ymax = .data[["response"]] + .data[["se"]]), + ymax = .data[["response"]] + .data[["se"]] + ), alpha = 0.2, - fill = viridis::viridis(1, begin = 0.5)) + fill = viridis::viridis(1, begin = 0.5) + ) } else { se_geom = NULL } - ggplot(grid, + ggplot( + grid, mapping = aes( x = .data[[features]], - y = .data[["response"]])) + + y = .data[["response"]] + ) + ) + se_geom + geom_line(color = viridis::viridis(1, begin = 0.5)) + geom_point( @@ -64,25 +79,30 @@ autoplot.LearnerRegr = function(object, type = "prediction", task, grid_points = shape = 21, color = "black", mapping = aes( - y = .data[[task$target_names]])) + + y = .data[[task$target_names]] + ) + ) + scale_color_viridis_d(end = 0.8) + theme } else { - if (!is.numeric(grid[[features[1L]]])) { theme = theme + theme(axis.text.x = element_text(angle = 45, hjust = 1)) } - ggplot(grid, + ggplot( + grid, mapping = aes( x = .data[[features[1L]]], - y = .data[[features[2L]]])) + + y = .data[[features[2L]]] + ) + ) + geom_raster(aes(fill = .data[["response"]])) + geom_point( mapping = aes(fill = .data[[task$target_names]]), data = task$data(), shape = 21, - color = "black") + + color = "black" + ) + scale_fill_viridis_c(end = 0.8) + guides(fill = guide_colorbar(barwidth = 0.5, barheight = 10)) + theme + diff --git a/R/LearnerRegrCVGlmnet.R b/R/LearnerRegrCVGlmnet.R index 0400d74e..90e8cab3 100644 --- a/R/LearnerRegrCVGlmnet.R +++ b/R/LearnerRegrCVGlmnet.R @@ -1,8 +1,18 @@ #' @rdname autoplot.LearnerClassifGlmnet #' @export -autoplot.LearnerRegrCVGlmnet = function(object, type = "prediction", task = NULL, grid_points = 100L, expand_range = 0, theme = theme_minimal(), ...) { # nolint +#nolint next +autoplot.LearnerRegrCVGlmnet = function( + object, + type = "prediction", + task = NULL, + grid_points = 100L, + expand_range = 0, + theme = theme_minimal(), + ... +) { assert_choice(type, choices = c("prediction", "ggfortify"), null.ok = FALSE) - switch(type, + switch( + type, "prediction" = { NextMethod() }, diff --git a/R/LearnerRegrGlmnet.R b/R/LearnerRegrGlmnet.R index 422d87c2..3174ad0d 100644 --- a/R/LearnerRegrGlmnet.R +++ b/R/LearnerRegrGlmnet.R @@ -1,9 +1,19 @@ #' @rdname autoplot.LearnerClassifGlmnet #' @export -autoplot.LearnerRegrGlmnet = function(object, type = "prediction", task = NULL, grid_points = 100L, expand_range = 0, theme = theme_minimal(), ...) { # nolint +#nolint next +autoplot.LearnerRegrGlmnet = function( + object, + type = "prediction", + task = NULL, + grid_points = 100L, + expand_range = 0, + theme = theme_minimal(), + ... +) { assert_has_model(object) - switch(type, + switch( + type, "prediction" = { NextMethod() }, diff --git a/R/LearnerRegrRpart.R b/R/LearnerRegrRpart.R index f3a7b691..ad0eb749 100644 --- a/R/LearnerRegrRpart.R +++ b/R/LearnerRegrRpart.R @@ -1,10 +1,20 @@ #' @export #' @rdname autoplot.LearnerClassifRpart -autoplot.LearnerRegrRpart = function(object, type = "prediction", task = NULL, grid_points = 100L, expand_range = 0, theme = theme_minimal(), ...) { # nolint +#nolint next +autoplot.LearnerRegrRpart = function( + object, + type = "prediction", + task = NULL, + grid_points = 100L, + expand_range = 0, + theme = theme_minimal(), + ... +) { assert_choice(type, choices = c("prediction", "ggparty"), null.ok = FALSE) assert_has_model(object) - switch(type, + switch( + type, "prediction" = { NextMethod() }, @@ -19,21 +29,26 @@ autoplot.LearnerRegrRpart = function(object, type = "prediction", task = NULL, g ggparty::geom_node_splitvar() + ggparty::geom_node_plot( gglist = list( - geom_boxplot(aes(y = .data[[target]]), + geom_boxplot( + aes(y = .data[[target]]), fill = viridis::viridis(1, begin = 0.5), alpha = 0.8, color = "#000000", - linewidth = 0.5), + linewidth = 0.5 + ), scale_x_discrete(), theme, theme( axis.ticks.x = element_blank(), axis.text.x = element_blank() - ))) + + ) + ) + ) + ggparty::geom_node_label( aes(label = paste0("n=", .data[["nodesize"]])), nudge_y = 0.03, - ids = "terminal") + ids = "terminal" + ) }, stopf("Unknown plot type '%s'", type) diff --git a/R/OptimInstanceBatchSingleCrit.R b/R/OptimInstanceBatchSingleCrit.R index 80f1ec1b..596ebe74 100644 --- a/R/OptimInstanceBatchSingleCrit.R +++ b/R/OptimInstanceBatchSingleCrit.R @@ -85,9 +85,23 @@ #' print(autoplot(instance, type = "incumbent")) #' } #' } -autoplot.OptimInstanceBatchSingleCrit = function(object, type = "marginal", cols_x = NULL, trafo = FALSE, learner = mlr3::lrn("regr.ranger"), grid_resolution = 100, batch = NULL, theme = theme_minimal(), ...) { # nolint - assert_choice(type, choices = c("marginal", "performance", "parameter", "parallel", - "points", "surface", "pairs", "incumbent"), null.ok = FALSE) +#nolint next +autoplot.OptimInstanceBatchSingleCrit = function( + object, + type = "marginal", + cols_x = NULL, + trafo = FALSE, + learner = mlr3::lrn("regr.ranger"), + grid_resolution = 100, + batch = NULL, + theme = theme_minimal(), + ... +) { + assert_choice( + type, + choices = c("marginal", "performance", "parameter", "parallel", "points", "surface", "pairs", "incumbent"), + null.ok = FALSE + ) assert_subset(cols_x, c(object$archive$cols_x, paste0("x_domain_", object$archive$cols_x))) assert_flag(trafo) @@ -100,11 +114,14 @@ autoplot.OptimInstanceBatchSingleCrit = function(object, type = "marginal", cols } cols_y = object$archive$cols_y data = fortify(object) - if (is.null(batch)) batch = seq_len(object$archive$n_batch) + if (is.null(batch)) { + batch = seq_len(object$archive$n_batch) + } assert_subset(batch, seq_len(object$archive$n_batch)) data = data[list(batch), , on = "batch_nr"] - switch(type, + switch( + type, "marginal" = { # each parameter versus performance plots = map(cols_x, function(x) { @@ -114,16 +131,14 @@ autoplot.OptimInstanceBatchSingleCrit = function(object, type = "marginal", cols breaks[length(breaks)] = max(data_i$batch_nr) data_i[, "batch_nr" := as.factor(get("batch_nr"))] - ggplot(data_i, - mapping = aes(x = .data[[x]], - y = .data[[cols_y]]) - ) + + ggplot(data_i, mapping = aes(x = .data[[x]], y = .data[[cols_y]])) + geom_point( mapping = aes(fill = .data$batch_nr), shape = 21, size = 3, stroke = 0.5, - alpha = 0.8) + + alpha = 0.8 + ) + scale_fill_viridis_d("Batch", breaks = breaks) + theme }) @@ -140,22 +155,28 @@ autoplot.OptimInstanceBatchSingleCrit = function(object, type = "marginal", cols top_batch[, "group" := factor(1, labels = "Best value")] ggplot() + - geom_line(top_batch, + geom_line( + top_batch, mapping = aes( x = .data[["batch_nr"]], y = .data[[cols_y]], - color = .data[["group"]]), + color = .data[["group"]] + ), group = 1, - linewidth = 1) + - geom_point(data, + linewidth = 1 + ) + + geom_point( + data, mapping = aes( x = .data[["batch_nr"]], y = .data[[cols_y]], - fill = .data[["group"]]), + fill = .data[["group"]] + ), shape = 21, size = 3, stroke = 0.5, - alpha = 0.8) + + alpha = 0.8 + ) + labs(x = "Batch") + scale_y_continuous(breaks = pretty_breaks()) + scale_fill_manual(values = viridis::viridis(1, begin = 0.33)) + @@ -167,17 +188,22 @@ autoplot.OptimInstanceBatchSingleCrit = function(object, type = "marginal", cols "parameter" = { # each parameter versus iteration plots = map(cols_x, function(x) { - ggplot(data, + ggplot( + data, mapping = aes( x = .data$batch_nr, - y = .data[[x]])) + + y = .data[[x]] + ) + ) + geom_point( mapping = aes( - fill = .data[[cols_y]]), - shape = 21, - size = 3, - stroke = 0.5, - alpha = 0.8) + + fill = .data[[cols_y]] + ), + shape = 21, + size = 3, + stroke = 0.5, + alpha = 0.8 + ) + guides(fill = guide_colorbar(barwidth = 0.5, barheight = 10)) + scale_fill_viridis_c(breaks = scales::pretty_breaks()) + theme @@ -204,7 +230,9 @@ autoplot.OptimInstanceBatchSingleCrit = function(object, type = "marginal", cols # rescale data_n = data_n[, lapply(.SD, function(x) if (sd(x) > 0) (x - mean(x)) / sd(x) else rep(0, length(x)))] - data_c = data_c[, lapply(.SD, function(x) if (sd(x) > 0) (x - mean(unique(x))) / sd(unique(x)) else rep(0, length(x)))] + data_c = data_c[, lapply(.SD, function(x) { + if (sd(x) > 0) (x - mean(unique(x))) / sd(unique(x)) else rep(0, length(x)) + })] # to long format set(data_n, j = "id", value = seq_row(data_n)) @@ -226,20 +254,29 @@ autoplot.OptimInstanceBatchSingleCrit = function(object, type = "marginal", cols data = merge(data, data_y, by = "id") setorderv(data, "x") - ggplot(data, + ggplot( + data, mapping = aes( x = .data[["x"]], - y = .data[["value"]])) + + y = .data[["value"]] + ) + ) + geom_line( mapping = aes( group = .data$id, - color = .data[[cols_y]]), - linewidth = 1) + + color = .data[[cols_y]] + ), + linewidth = 1 + ) + geom_vline(aes(xintercept = x)) + + #nolint next { - if (nrow(data_c)) geom_label( - mapping = aes(label = .data$label), - data = data[!is.na(data$label), ]) + if (nrow(data_c)) { + geom_label( + mapping = aes(label = .data$label), + data = data[!is.na(data$label), ] + ) + } } + scale_x_continuous(breaks = x_axis$x, labels = x_axis$variable) + scale_color_viridis_c() + @@ -253,16 +290,20 @@ autoplot.OptimInstanceBatchSingleCrit = function(object, type = "marginal", cols stop("Scatter plots can only be drawn with 2 parameters.") } - ggplot(data, + ggplot( + data, mapping = aes( x = .data[[cols_x[1]]], - y = .data[[cols_x[2]]])) + + y = .data[[cols_x[2]]] + ) + ) + geom_point( mapping = aes(fill = .data[[cols_y]]), data = data, shape = 21, size = 3, - stroke = 1) + + stroke = 1 + ) + scale_fill_viridis_c() + guides(fill = guide_colorbar(barwidth = 0.5, barheight = 10)) + theme @@ -292,19 +333,24 @@ autoplot.OptimInstanceBatchSingleCrit = function(object, type = "marginal", cols setDT(data_i)[, (cols_y) := learner$predict_newdata(data_i)$response] - ggplot(data_i, + ggplot( + data_i, mapping = aes( x = .data[[cols_x[1]]], - y = .data[[cols_x[2]]])) + + y = .data[[cols_x[2]]] + ) + ) + geom_raster( - mapping = aes(fill = .data[[cols_y]])) + + mapping = aes(fill = .data[[cols_y]]) + ) + geom_point( mapping = aes(fill = .data[[cols_y]]), data = data, shape = 21, size = 3, stroke = 0.5, - alpha = 0.8) + + alpha = 0.8 + ) + scale_x_continuous(expand = c(0.01, 0.01)) + scale_y_continuous(expand = c(0.01, 0.01)) + guides(fill = guide_colorbar(barwidth = 0.5, barheight = 10)) + @@ -318,25 +364,45 @@ autoplot.OptimInstanceBatchSingleCrit = function(object, type = "marginal", cols color = viridis::viridis(1, begin = 0.5) alpha = 0.8 - GGally::ggpairs(data[, c(cols_x, cols_y, "batch_nr"), with = FALSE], + GGally::ggpairs( + data[, c(cols_x, cols_y, "batch_nr"), with = FALSE], switch = "both", - upper = list(continuous = "cor", combo = GGally::wrap("box_no_facet", fill = color, alpha = alpha), discrete = "count", na = "na"), - lower = list(continuous = GGally::wrap("points", color = color), combo = GGally::wrap("facethist", fill = color, alpha = alpha), discrete = GGally::wrap("facetbar", fill = color, alpha = alpha), na = "na"), - diag = list(continuous = GGally::wrap("densityDiag", color = color), discrete = GGally::wrap("barDiag", fill = color, alpha = alpha), na = "naDiag")) + + upper = list( + continuous = "cor", + combo = GGally::wrap("box_no_facet", fill = color, alpha = alpha), + discrete = "count", + na = "na" + ), + lower = list( + continuous = GGally::wrap("points", color = color), + combo = GGally::wrap("facethist", fill = color, alpha = alpha), + discrete = GGally::wrap("facetbar", fill = color, alpha = alpha), + na = "na" + ), + diag = list( + continuous = GGally::wrap("densityDiag", color = color), + discrete = GGally::wrap("barDiag", fill = color, alpha = alpha), + na = "naDiag" + ) + ) + theme }, "incumbent" = { data[, "incumbent" := cummin(.SD[[1]]), .SDcols = cols_y] - ggplot(data, + ggplot( + data, mapping = aes( x = seq_row(data), y = .data[["incumbent"]], - lty = cols_y)) + + lty = cols_y + ) + ) + geom_step( linewidth = 1, - color = viridis::viridis(1, begin = 0.5)) + + color = viridis::viridis(1, begin = 0.5) + ) + labs(x = "Number of Configurations", y = cols_y) + scale_linetype(name = "Incumbent") + theme @@ -347,6 +413,7 @@ autoplot.OptimInstanceBatchSingleCrit = function(object, type = "marginal", cols } #' @export -fortify.OptimInstanceBatchSingleCrit = function(model, data = NULL, ...) { # nolint +#nolint next +fortify.OptimInstanceBatchSingleCrit = function(model, data = NULL, ...) { as.data.table(model$archive, unnest = "x_domain") } diff --git a/R/Prediction.R b/R/Prediction.R index aaea3fbc..785dc8de 100644 --- a/R/Prediction.R +++ b/R/Prediction.R @@ -1,4 +1,5 @@ #' @export -fortify.Prediction = function(model, data, ...) { # nolint +#nolint next +fortify.Prediction = function(model, data, ...) { as.data.table(model) } diff --git a/R/PredictionClassif.R b/R/PredictionClassif.R index 36389de4..0ceb33ea 100644 --- a/R/PredictionClassif.R +++ b/R/PredictionClassif.R @@ -10,7 +10,8 @@ #' Requires package \CRANpkg{precrec}. #' * `"prc"`: Precision recall curve. #' Requires package \CRANpkg{precrec}. -#' * `"threshold"`: Systematically varies the threshold of the [mlr3::PredictionClassif] object and plots the resulting performance as returned by `measure`. +#' * `"threshold"`: Systematically varies the threshold of the [mlr3::PredictionClassif] object and plots the +#' resulting performance as returned by `measure`. #' #' @param object ([mlr3::PredictionClassif]). #' @template param_type @@ -37,25 +38,32 @@ #' autoplot(object, type = "prc") #' } #' } -autoplot.PredictionClassif = function(object, type = "stacked", measure = NULL, theme = theme_minimal(), ...) { # nolint +#nolint next +autoplot.PredictionClassif = function(object, type = "stacked", measure = NULL, theme = theme_minimal(), ...) { assert_choice(type, choices = c("stacked", "roc", "prc", "threshold"), null.ok = FALSE) - switch(type, + switch( + type, "stacked" = { tab = melt(fortify(object)[, c("truth", "response")], measure.vars = c("truth", "response")) - ggplot(tab, + ggplot( + tab, mapping = aes( fill = .data[["value"]], - x = .data[["variable"]])) + + x = .data[["variable"]] + ) + ) + geom_bar( width = 0.5, color = "#000000", - alpha = 0.8) + + alpha = 0.8 + ) + geom_text( mapping = aes(label = after_stat(count)), stat = "count", position = position_stack(vjust = 0.5), - color = "#000000") + + color = "#000000" + ) + labs(x = "Feature", y = "Count") + scale_fill_viridis_d("Feature", end = 0.8) + theme @@ -67,7 +75,6 @@ autoplot.PredictionClassif = function(object, type = "stacked", measure = NULL, theme + theme(legend.position = "none") + labs(title = NULL) - }, "prc" = { @@ -83,10 +90,13 @@ autoplot.PredictionClassif = function(object, type = "stacked", measure = NULL, pred = object$clone(deep = TRUE) tab = data.table(prob = seq(from = 0, to = 1, by = 0.01)) tab$score = map_dbl(tab$prob, function(p) pred$set_threshold(p)$score(measure)) - ggplot(tab, + ggplot( + tab, mapping = aes( x = .data[["prob"]], - y = .data[["score"]])) + + y = .data[["score"]] + ) + ) + geom_line(color = viridis::viridis(1, begin = 0.5)) + labs(x = "Probability Threshold", y = measure$id) + scale_color_viridis_d() + diff --git a/R/PredictionClust.R b/R/PredictionClust.R index b8580df2..5fbb98a0 100644 --- a/R/PredictionClust.R +++ b/R/PredictionClust.R @@ -36,10 +36,12 @@ #' head(fortify(object)) #' autoplot(object, task) #' } -autoplot.PredictionClust = function(object, task, row_ids = NULL, type = "scatter", theme = theme_minimal(), ...) { # nolint +#nolint next +autoplot.PredictionClust = function(object, task, row_ids = NULL, type = "scatter", theme = theme_minimal(), ...) { assert_choice(type, choices = c("scatter", "sil", "pca"), null.ok = FALSE) - switch(type, + switch( + type, "scatter" = { require_namespaces("GGally") @@ -77,7 +79,8 @@ autoplot.PredictionClust = function(object, task, row_ids = NULL, type = "scatte require_namespaces("ggfortify") d = data.frame( row_ids = object$data$row_id, - cluster = as.factor(object$data$partition)) + cluster = as.factor(object$data$partition) + ) if (is.null(row_ids)) { task_data = data.table(task$data(), row_ids = task$row_ids) @@ -86,10 +89,7 @@ autoplot.PredictionClust = function(object, task, row_ids = NULL, type = "scatte } plot_data = merge(task_data, d, by = "row_ids") - ggplot2::autoplot(stats::prcomp(task_data[, -"row_ids"]), - data = plot_data, - colour = "cluster", - size = 3) + + ggplot2::autoplot(stats::prcomp(task_data[, -"row_ids"]), data = plot_data, colour = "cluster", size = 3) + scale_color_viridis_d("Cluster", end = 0.8, alpha = 0.8) + theme }, diff --git a/R/PredictionRegr.R b/R/PredictionRegr.R index ea99df66..9a8bc7f6 100644 --- a/R/PredictionRegr.R +++ b/R/PredictionRegr.R @@ -6,12 +6,15 @@ #' Possible choices are: #' #' * `"xy"` (default): Scatterplot of "true" response vs. "predicted" response. -#' By default a linear model is fitted via `geom_smooth(method = "lm")` to visualize the trend between x and y (by default colored blue). +#' By default a linear model is fitted via `geom_smooth(method = "lm")` to visualize the trend between x and y +#' (by default colored blue). #' In addition `geom_abline()` with `slope = 1` is added to the plot. #' Note that `geom_smooth()` and `geom_abline()` may overlap, depending on the given data. #' * `"histogram"`: Histogram of residuals: \eqn{r = y - \hat{y}}{r = y - y.hat}. -#' * `"residual"`: Plot of the residuals, with the response \eqn{\hat{y}}{y.hat} on the "x" and the residuals on the "y" axis. -#' By default a linear model is fitted via `geom_smooth(method = "lm")` to visualize the trend between x and y (by default colored blue). +#' * `"residual"`: Plot of the residuals, with the response \eqn{\hat{y}}{y.hat} on the "x" and the residuals on +#' the "y" axis. +#' By default a linear model is fitted via `geom_smooth(method = "lm")` to visualize the trend between x and y +#' (by default colored blue). #' * `"confidence"`: Scatterplot of "true" response vs. "predicted" response with #' confidence intervals. Error bars calculated as object$response +- quantile * object$se and so only #' possible with `predict_type = "se"`. `geom_abline()` with `slope = 1` is added to the plot. @@ -45,58 +48,81 @@ #' object = learner$train(task)$predict(task) #' autoplot(object, type = "confidence") #' } -autoplot.PredictionRegr = function(object, type = "xy", binwidth = NULL, theme = theme_minimal(), quantile = 1.96, ...) { +autoplot.PredictionRegr = function( + object, + type = "xy", + binwidth = NULL, + theme = theme_minimal(), + quantile = 1.96, + ... +) { assert_choice(type, choices = c("xy", "histogram", "residual", "confidence"), null.ok = FALSE) - switch(type, + switch( + type, "xy" = { - ggplot(object, + ggplot( + object, mapping = aes( x = .data[["response"]], - y = .data[["truth"]])) + + y = .data[["truth"]] + ) + ) + geom_abline( slope = 1, - alpha = 0.5) + + alpha = 0.5 + ) + geom_point( color = viridis::viridis(1, begin = 0.33), - alpha = 0.8) + + alpha = 0.8 + ) + geom_rug(sides = "bl") + geom_smooth( formula = y ~ x, method = "lm", - color = viridis::viridis(1, begin = 0.5)) + + color = viridis::viridis(1, begin = 0.5) + ) + theme }, "histogram" = { object = ggplot2::fortify(object) - ggplot(object, + ggplot( + object, mapping = aes( x = .data[["truth"]] - .data[["response"]], - y = after_stat(.data[["density"]]))) + + y = after_stat(.data[["density"]]) + ) + ) + geom_histogram( fill = viridis::viridis(1, begin = 0.5), alpha = 0.8, color = "black", - binwidth = binwidth) + + binwidth = binwidth + ) + labs(x = "Residuals", y = "Density") + theme }, "residual" = { - ggplot(object, + ggplot( + object, mapping = aes( x = .data[["response"]], - y = .data[["truth"]] - .data[["response"]])) + + y = .data[["truth"]] - .data[["response"]] + ) + ) + geom_point( color = viridis::viridis(1, begin = 0.33), - alpha = 0.8) + + alpha = 0.8 + ) + geom_rug(sides = "bl") + geom_smooth( formula = y ~ x, method = "lm", fill = viridis::viridis(1, begin = 0.5), - color = viridis::viridis(1, begin = 0.5)) + + color = viridis::viridis(1, begin = 0.5) + ) + labs(x = "Response", y = "Residuals") + theme }, @@ -109,22 +135,29 @@ autoplot.PredictionRegr = function(object, type = "xy", binwidth = NULL, theme = df = data.frame( lower = object$response - quantile * object$se, central = object$response, - upper = object$response + quantile * object$se, truth = object$truth) + upper = object$response + quantile * object$se, + truth = object$truth + ) - ggplot(df, + ggplot( + df, mapping = aes( x = .data[["central"]], xmin = .data[["lower"]], xmax = .data[["upper"]], - y = .data[["truth"]])) + + y = .data[["truth"]] + ) + ) + geom_abline( slope = 1, colour = "grey", - linetype = 3) + + linetype = 3 + ) + geom_linerange(color = viridis::viridis(1, begin = 0.33)) + geom_point( color = viridis::viridis(1, begin = 0.5), - alpha = 0.8) + + alpha = 0.8 + ) + labs(x = sprintf("Response \u00B1 %sse", quantile), y = "Truth") + theme }, diff --git a/R/ResampleResult.R b/R/ResampleResult.R index 47614e93..e1064081 100644 --- a/R/ResampleResult.R +++ b/R/ResampleResult.R @@ -8,13 +8,15 @@ #' * `"boxplot"` (default): Boxplot of performance measures. #' * `"histogram"`: Histogram of performance measures. #' * `"roc"`: ROC curve (1 - specificity on x, sensitivity on y). -#' The predictions of the individual [mlr3::Resampling]s are merged prior to calculating the ROC curve (micro averaged). +#' The predictions of the individual [mlr3::Resampling]s are merged prior to calculating the ROC curve +#' (micro averaged). #' Requires package \CRANpkg{precrec}. #' * `"prc"`: Precision recall curve. #' See `"roc"`. #' * `"prediction"`: Plots the learner prediction for a grid of points. #' Needs models to be stored. Set `store_models = TRUE` for [mlr3::resample()]. -#' For classification, we support tasks with exactly two features and learners with `predict_type=` set to `"response"` or `"prob"`. +#' For classification, we support tasks with exactly two features and learners with `predict_type=` set to +#' `"response"` or `"prob"`. #' For regression, we support tasks with one or two features. #' For tasks with one feature we can print confidence bounds if the predict type of the learner was set to `"se"`. #' For tasks with two features the predict type will be ignored. @@ -70,21 +72,29 @@ #' autoplot(object, type = "prediction") #' } #' } -autoplot.ResampleResult = function(object, type = "boxplot", measure = NULL, predict_sets = "test", binwidth = NULL, theme = theme_minimal(), ...) { +autoplot.ResampleResult = function( + object, + type = "boxplot", + measure = NULL, + predict_sets = "test", + binwidth = NULL, + theme = theme_minimal(), + ... +) { assert_choice(type, choices = c("boxplot", "histogram", "prediction", "roc", "prc"), null.ok = FALSE) task = object$task measure = mlr3::assert_measure(mlr3::as_measure(measure, task_type = task$task_type), task = task) - switch(type, + switch( + type, "boxplot" = { - ggplot(object, - measure = measure, - mapping = aes(y = .data[["performance"]])) + + ggplot(object, measure = measure, mapping = aes(y = .data[["performance"]])) + geom_boxplot( fill = viridis::viridis(1, begin = 0.5), alpha = 0.8, - show.legend = FALSE) + + show.legend = FALSE + ) + scale_x_discrete() + labs(y = measure$id) + theme + @@ -92,14 +102,13 @@ autoplot.ResampleResult = function(object, type = "boxplot", measure = NULL, pre }, "histogram" = { - ggplot(object, - measure = measure, - aes(x = .data[["performance"]])) + + ggplot(object, measure = measure, aes(x = .data[["performance"]])) + geom_histogram( fill = viridis::viridis(1, begin = 0.5), alpha = 0.8, color = "black", - binwidth = binwidth) + + binwidth = binwidth + ) + labs(x = measure$id, y = "Count") + theme }, @@ -111,7 +120,8 @@ autoplot.ResampleResult = function(object, type = "boxplot", measure = NULL, pre p + guides( color = "none", - fill = "none") + + fill = "none" + ) + scale_color_viridis_d("Learner", begin = 0.5) + scale_fill_viridis_d("Learner", begin = 0.5) + theme + @@ -126,7 +136,8 @@ autoplot.ResampleResult = function(object, type = "boxplot", measure = NULL, pre p + guides( color = "none", - fill = "none") + + fill = "none" + ) + scale_color_viridis_d("Learner", begin = 0.5) + scale_fill_viridis_d("Learner", begin = 0.5) + theme + @@ -145,17 +156,22 @@ plot.ResampleResult = function(x, ...) { } #' @export -fortify.ResampleResult = function(model, data, measure = NULL, ...) { # nolint +#nolint next +fortify.ResampleResult = function(model, data, measure = NULL, ...) { task = model$task measure = mlr3::assert_measure(mlr3::as_measure(measure, task_type = task$task_type), task = task) data = model$score(measure)[, c("iteration", measure$id), with = FALSE] - melt(data, - measure.vars = measure$id, - variable.name = "measure_id", value.name = "performance") + melt(data, measure.vars = measure$id, variable.name = "measure_id", value.name = "performance") } -plot_learner_prediction_resample_result = function(object, predict_sets, grid_points = 100L, expand_range = 0, theme = theme_minimal()) { - +#nolint next +plot_learner_prediction_resample_result = function( + object, + predict_sets, + grid_points = 100L, + expand_range = 0, + theme = theme_minimal() +) { task = object$task task_type = task$task_type features = task$feature_names @@ -177,7 +193,7 @@ plot_learner_prediction_resample_result = function(object, predict_sets, grid_po mlr3misc::stopf("Plot learner prediction only works with one or two features for regression!", wrap = TRUE) } - grid = predict_grid(learners, task, grid_points = grid_points, expand_range = expand_range) + grid = predict_grid(learners, task, grid_points = grid_points, expand_range = expand_range) # facets for multiple resampling iterations if (length(learners) > 1L) { @@ -193,29 +209,38 @@ plot_learner_prediction_resample_result = function(object, predict_sets, grid_po if (task_type == "regr" && dim == 1L) { if (learners[[1L]]$predict_type == "se") { se_geom = geom_ribbon( - mapping = aes( - ymin = .data[["response"]] - .data[["se"]], - ymax = .data[["response"]] + .data[["se"]]), - alpha = 0.2, - fill = viridis::viridis(1, begin = 0.5)) + mapping = aes( + ymin = .data[["response"]] - .data[["se"]], + ymax = .data[["response"]] + .data[["se"]] + ), + alpha = 0.2, + fill = viridis::viridis(1, begin = 0.5) + ) } else { se_geom = NULL } - g = ggplot(grid, + g = ggplot( + grid, mapping = aes( x = .data[[features]], - y = .data[["response"]])) + + y = .data[["response"]] + ) + ) + se_geom + geom_line(color = viridis::viridis(1, begin = 0.5)) + - geom_point(data = task_data(object, predict_sets), + geom_point( + data = task_data(object, predict_sets), mapping = aes( y = .data[[task$target_names]], shape = .data[[".predict_set"]], - color = .data[[".predict_set"]])) + + color = .data[[".predict_set"]] + ) + ) + scale_shape_manual( values = c(train = 16, test = 15, both = 17), - name = "Set") + + name = "Set" + ) + labs(color = "Set") + scale_color_viridis_d(end = 0.8) + theme + @@ -227,10 +252,12 @@ plot_learner_prediction_resample_result = function(object, predict_sets, grid_po # classif, probs raster_aes = aes( fill = .data[["response"]], - alpha = .data[[".prob.response"]]) + alpha = .data[[".prob.response"]] + ) scale_alpha = scale_alpha_continuous( name = "Probability", - guide = guide_legend(override.aes = list(fill = viridis::viridis(1)))) + guide = guide_legend(override.aes = list(fill = viridis::viridis(1))) + ) scale_fill = scale_fill_viridis_d(end = 0.8) guides = NULL } else if (task_type == "classif" && learners[[1L]]$predict_type == "response") { @@ -251,22 +278,27 @@ plot_learner_prediction_resample_result = function(object, predict_sets, grid_po theme = theme + theme(axis.text.x = element_text(angle = 45, hjust = 1)) } - g = ggplot(grid, + g = ggplot( + grid, mapping = aes( x = .data[[features[1L]]], - y = .data[[features[2L]]])) + + y = .data[[features[2L]]] + ) + ) + geom_raster(raster_aes) + geom_point( mapping = aes(fill = .data[[task$target_names]], shape = .data[[".predict_set"]]), data = task_data(object, predict_sets), - color = "black") + + color = "black" + ) + scale_fill + guides + theme + theme(legend.position = "right") + scale_shape_manual( values = c(train = 21, test = 22, both = 23), - name = "Set") + + name = "Set" + ) + scale_alpha + labs(fill = "Response") + folds_facet @@ -282,7 +314,6 @@ plot_learner_prediction_resample_result = function(object, predict_sets, grid_po # object: ResampleResult # predict_sets: see above task_data = function(object, predict_sets) { - # if train and test is in predict_sets, allow "both" to be plotted if (all(c("train", "test") %in% predict_sets) && "both" %nin% predict_sets) { @@ -294,8 +325,9 @@ task_data = function(object, predict_sets) { types = lapply(seq_along(object$learners), function(i) { ids = seq_len(object$task$nrow) - type = (ids %in% object$resampling$train_set(i)) + 2L * - (ids %in% object$resampling$test_set(i)) + type = (ids %in% object$resampling$train_set(i)) + + 2L * + (ids %in% object$resampling$test_set(i)) type = type_char[type + 1L] select_ids = !is.na(type) data.table(.row_id = ids[select_ids], .predict_set = type[select_ids]) @@ -303,7 +335,7 @@ task_data = function(object, predict_sets) { types = rbindlist(types, idcol = TRUE, use.names = FALSE) data = cbind(types, object$task$data()[types$.row_id, ]) - return(remove_named(data, ".row_id")) + remove_named(data, ".row_id") } # Generates an evenly distributed sequence of the same type as the input vector. @@ -316,8 +348,10 @@ sequenize = function(x, n, expand_range = 0) { r = range(x, na.rm = TRUE) d = diff(r) res = seq( - from = r[1L] - expand_range * d, to = r[2L] + expand_range * d, - length.out = n) + from = r[1L] - expand_range * d, + to = r[2L] + expand_range * d, + length.out = n + ) if (is.integer(x)) { res = unique(as.integer(round(res))) } @@ -350,18 +384,32 @@ predict_grid = function(learners, task, grid_points, expand_range) { grid = cross_join(grid, sorted = FALSE) grid = cbind( grid, - remove_named(as.data.table(learner$predict_newdata( - newdata = grid, - task = task)), c("row_id", "truth"))) + remove_named( + as.data.table(learner$predict_newdata( + newdata = grid, + task = task + )), + c("row_id", "truth") + ) + ) }) grid = rbindlist(grids, idcol = TRUE, use.names = FALSE) # reduce to prob columns to one column for the predicted class if (learners[[1]]$predict_type == "prob") { - grid[, ".prob.response" := .SD[, paste0( - "prob.", # nolint - get("response")), with = FALSE], by = "response"] + grid[, + #nolint next + ".prob.response" := .SD[, + paste0( + #nolint next + "prob.", + get("response") + ), + with = FALSE + ], + by = "response" + ] } - return(grid) + grid } diff --git a/R/Task.R b/R/Task.R index b8bee359..f42e6900 100644 --- a/R/Task.R +++ b/R/Task.R @@ -1,4 +1,5 @@ #' @export -fortify.Task = function(model, data = NULL, ...) { # nolint +#nolint next +fortify.Task = function(model, data = NULL, ...) { as.data.table(model) } diff --git a/R/TaskClassif.R b/R/TaskClassif.R index e4703eb4..3e9b394e 100644 --- a/R/TaskClassif.R +++ b/R/TaskClassif.R @@ -28,21 +28,27 @@ #' autoplot(task$clone()$select(c("Sepal.Length", "Sepal.Width")), type = "pairs") #' autoplot(task, type = "duo") #' } -autoplot.TaskClassif = function(object, type = "target", theme = theme_minimal(), ...) { # nolint +#nolint next +autoplot.TaskClassif = function(object, type = "target", theme = theme_minimal(), ...) { assert_choice(type, choices = c("target", "duo", "pairs"), null.ok = FALSE) target = object$target_names - switch(type, + switch( + type, "target" = { - ggplot(object, + ggplot( + object, mapping = aes( x = .data[[target]], - fill = .data[[target]])) + + fill = .data[[target]] + ) + ) + geom_bar( stat = "count", color = "#000000", - linewidth = 0.5) + + linewidth = 0.5 + ) + scale_fill_viridis_d(end = 0.8, alpha = 0.8, ) + scale_color_viridis_d(end = 0.8) + theme @@ -51,10 +57,12 @@ autoplot.TaskClassif = function(object, type = "target", theme = theme_minimal() "duo" = { # Line width!!! require_namespaces("GGally") - GGally::ggduo(object, + GGally::ggduo( + object, columnsX = target, columnsY = object$feature_names, - mapping = aes(color = .data[[target]])) + + mapping = aes(color = .data[[target]]) + ) + scale_fill_viridis_d(end = 0.8, alpha = 0.8) + scale_color_viridis_d(end = 0.8) + theme + @@ -67,11 +75,27 @@ autoplot.TaskClassif = function(object, type = "target", theme = theme_minimal() "pairs" = { require_namespaces("GGally") - GGally::ggpairs(object, + GGally::ggpairs( + object, mapping = aes(color = .data[[target]]), - upper = list(continuous = "cor", combo = GGally::wrap("box_no_facet", color = "#000000", linewidth = 0.5), discrete = "count", na = "na"), - lower = list(continuous = GGally::wrap("points", size = 3, alpha = 0.8) , combo = GGally::wrap("facethist", color = "#000000", linewidth = 0.5), discrete = GGally::wrap("facetbar", color = "#000000", linewidth = 0.5), na = "na"), - diag = list(continuous = GGally::wrap("densityDiag", color = "#000000", linewidth = 0.5), discrete = GGally::wrap("barDiag", color = "#000000", linewidth = 0.5), na = "naDiag")) + + upper = list( + continuous = "cor", + combo = GGally::wrap("box_no_facet", color = "#000000", linewidth = 0.5), + discrete = "count", + na = "na" + ), + lower = list( + continuous = GGally::wrap("points", size = 3, alpha = 0.8), + combo = GGally::wrap("facethist", color = "#000000", linewidth = 0.5), + discrete = GGally::wrap("facetbar", color = "#000000", linewidth = 0.5), + na = "na" + ), + diag = list( + continuous = GGally::wrap("densityDiag", color = "#000000", linewidth = 0.5), + discrete = GGally::wrap("barDiag", color = "#000000", linewidth = 0.5), + na = "naDiag" + ) + ) + scale_fill_viridis_d(end = 0.8, alpha = 0.8) + scale_color_viridis_d(end = 0.8, alpha = 0.8) + theme diff --git a/R/TaskClust.R b/R/TaskClust.R index 174798d5..3a2c19f6 100644 --- a/R/TaskClust.R +++ b/R/TaskClust.R @@ -23,20 +23,38 @@ #' head(fortify(task)) #' autoplot(task) #' } -autoplot.TaskClust = function(object, type = "pairs", theme = theme_minimal(), ...) { # nolint +#nolint next +autoplot.TaskClust = function(object, type = "pairs", theme = theme_minimal(), ...) { assert_choice(type, choices = c("pairs"), null.ok = FALSE) - switch(type, + switch( + type, "pairs" = { require_namespaces("GGally") color = viridis::viridis(1, begin = 0.5) alpha = 0.8 - GGally::ggpairs(object, - upper = list(continuous = "cor", combo = GGally::wrap("box_no_facet", fill = color, alpha = alpha, color = "#000000", linewidth = 0.5), discrete = "count", na = "na"), - lower = list(continuous = GGally::wrap("points", color = color, alpha = alpha), combo = GGally::wrap("facethist", fill = color, alpha = alpha, color = "#000000", linewidth = 0.5), discrete = GGally::wrap("facetbar", fill = color, alpha = alpha, color = "#000000", linewidth = 0.5), na = "na"), - diag = list(continuous = GGally::wrap("densityDiag", color = color), discrete = GGally::wrap("barDiag", fill = color, alpha = alpha, color = "#000000", linewidth = 0.5), na = "naDiag")) + + GGally::ggpairs( + object, + upper = list( + continuous = "cor", + combo = GGally::wrap("box_no_facet", fill = color, alpha = alpha, color = "#000000", linewidth = 0.5), + discrete = "count", + na = "na" + ), + lower = list( + continuous = GGally::wrap("points", color = color, alpha = alpha), + combo = GGally::wrap("facethist", fill = color, alpha = alpha, color = "#000000", linewidth = 0.5), + discrete = GGally::wrap("facetbar", fill = color, alpha = alpha, color = "#000000", linewidth = 0.5), + na = "na" + ), + diag = list( + continuous = GGally::wrap("densityDiag", color = color), + discrete = GGally::wrap("barDiag", fill = color, alpha = alpha, color = "#000000", linewidth = 0.5), + na = "naDiag" + ) + ) + theme }, diff --git a/R/TaskRegr.R b/R/TaskRegr.R index 82efefde..2a3fae21 100644 --- a/R/TaskRegr.R +++ b/R/TaskRegr.R @@ -26,24 +26,31 @@ #' autoplot(task) #' autoplot(task, type = "pairs") #' } -autoplot.TaskRegr = function(object, type = "target", theme = theme_minimal(), ...) { # nolint +#nolint next +autoplot.TaskRegr = function(object, type = "target", theme = theme_minimal(), ...) { assert_choice(type, choices = c("target", "pairs"), null.ok = FALSE) - switch(type, + switch( + type, "target" = { - ggplot(data = object, - mapping = aes( - y = .data[[object$target_names]])) + - geom_boxplot( - fill = viridis::viridis(1, begin = 0.5), - alpha = 0.8, - color = "#000000", - linewidth = 0.5) + + ggplot( + data = object, + mapping = aes( + y = .data[[object$target_names]] + ) + ) + + geom_boxplot( + fill = viridis::viridis(1, begin = 0.5), + alpha = 0.8, + color = "#000000", + linewidth = 0.5 + ) + scale_x_discrete() + theme + theme( axis.text.x.bottom = element_blank(), - axis.title.x.bottom = element_blank()) + axis.title.x.bottom = element_blank() + ) }, "pairs" = { @@ -52,10 +59,26 @@ autoplot.TaskRegr = function(object, type = "target", theme = theme_minimal(), . color = viridis::viridis(1, begin = 0.5) alpha = 0.8 - GGally::ggpairs(object, - upper = list(continuous = "cor", combo = GGally::wrap("box_no_facet", fill = color, alpha = alpha, color = "#000000", linewidth = 0.5), discrete = "count", na = "na"), - lower = list(continuous = GGally::wrap("points", color = color, alpha = alpha), combo = GGally::wrap("facethist", fill = color, alpha = alpha, color = "#000000", linewidth = 0.5), discrete = GGally::wrap("facetbar", fill = color, alpha = alpha, color = "#000000", linewidth = 0.5), na = "na"), - diag = list(continuous = GGally::wrap("densityDiag", color = color), discrete = GGally::wrap("barDiag", fill = color, alpha = alpha, color = "#000000", linewidth = 0.5), na = "naDiag")) + + GGally::ggpairs( + object, + upper = list( + continuous = "cor", + combo = GGally::wrap("box_no_facet", fill = color, alpha = alpha, color = "#000000", linewidth = 0.5), + discrete = "count", + na = "na" + ), + lower = list( + continuous = GGally::wrap("points", color = color, alpha = alpha), + combo = GGally::wrap("facethist", fill = color, alpha = alpha, color = "#000000", linewidth = 0.5), + discrete = GGally::wrap("facetbar", fill = color, alpha = alpha, color = "#000000", linewidth = 0.5), + na = "na" + ), + diag = list( + continuous = GGally::wrap("densityDiag", color = color), + discrete = GGally::wrap("barDiag", fill = color, alpha = alpha, color = "#000000", linewidth = 0.5), + na = "naDiag" + ) + ) + theme }, diff --git a/R/TuningInstanceBatchSingleCrit.R b/R/TuningInstanceBatchSingleCrit.R index 6070c00a..bf5877a1 100644 --- a/R/TuningInstanceBatchSingleCrit.R +++ b/R/TuningInstanceBatchSingleCrit.R @@ -72,6 +72,24 @@ #' # plot pairs #' autoplot(instance, type = "pairs") #' } -autoplot.TuningInstanceBatchSingleCrit = function(object, type = "marginal", cols_x = NULL, trafo = FALSE, learner = mlr3::lrn("regr.ranger"), grid_resolution = 100, theme = theme_minimal(), ...) { - autoplot.OptimInstanceBatchSingleCrit(object = object, type = type, cols_x = cols_x, trafo = trafo, learner = learner, grid_resolution = grid_resolution, theme = theme, ...) +autoplot.TuningInstanceBatchSingleCrit = function( + object, + type = "marginal", + cols_x = NULL, + trafo = FALSE, + learner = mlr3::lrn("regr.ranger"), + grid_resolution = 100, + theme = theme_minimal(), + ... +) { + autoplot.OptimInstanceBatchSingleCrit( + object = object, + type = type, + cols_x = cols_x, + trafo = trafo, + learner = learner, + grid_resolution = grid_resolution, + theme = theme, + ... + ) } diff --git a/R/as_precrec.R b/R/as_precrec.R index eb0c6b56..afe24c4a 100644 --- a/R/as_precrec.R +++ b/R/as_precrec.R @@ -32,7 +32,8 @@ roc_data = function(prediction) { #' @rdname as_precrec #' @export -as_precrec.PredictionClassif = function(object) { # nolint +#nolint next +as_precrec.PredictionClassif = function(object) { require_namespaces("precrec") data = roc_data(object) precrec::mmdata( @@ -46,7 +47,8 @@ as_precrec.PredictionClassif = function(object) { # nolint #' @rdname as_precrec #' @export -as_precrec.ResampleResult = function(object) { # nolint +#nolint next +as_precrec.ResampleResult = function(object) { require_namespaces("precrec") predictions = object$predictions() data = transpose_list(map(predictions, roc_data)) @@ -62,7 +64,8 @@ as_precrec.ResampleResult = function(object) { # nolint #' @rdname as_precrec #' @export -as_precrec.BenchmarkResult = function(object) { # nolint +#nolint next +as_precrec.BenchmarkResult = function(object) { require_namespaces("precrec") scores = object$score(measures = list()) diff --git a/R/helper.R b/R/helper.R index afe74e50..8cf204c1 100644 --- a/R/helper.R +++ b/R/helper.R @@ -19,7 +19,12 @@ plot_precrec = function(object, curvetype = "ROC", cb_alpha = 0.05, show_cb = TR cb_alpha = NULL } - suppressWarnings(autoplot(precrec::evalmod(x, calc_avg = calc_avg, cb_alpha = cb_alpha), curvetype = curvetype, show_cb = show_cb, ...)) + suppressWarnings(autoplot( + precrec::evalmod(x, calc_avg = calc_avg, cb_alpha = cb_alpha), + curvetype = curvetype, + show_cb = show_cb, + ... + )) } delayed_patchwork = function(li, ...) { diff --git a/R/plot_learner_prediction.R b/R/plot_learner_prediction.R index 56fd4f9f..0aac7844 100644 --- a/R/plot_learner_prediction.R +++ b/R/plot_learner_prediction.R @@ -3,12 +3,14 @@ #' @description #' Visualizations for the [mlr3::Prediction] of a single [mlr3::Learner] on a single [mlr3::Task]. #' -#' * For classification we support tasks with exactly two features and learners with `predict_type` set to `"response"` or `"prob"`. +#' * For classification we support tasks with exactly two features and learners with `predict_type` set to +#' `"response"` or `"prob"`. #' * For regression we support tasks with one or two features. #' For tasks with one feature we print confidence bounds if the predict type of the learner was set to `"se"`. #' For tasks with two features the predict type will be ignored. #' -#' Note that this function is a wrapper around [autoplot.ResampleResult()] for a temporary [mlr3::ResampleResult] using [mlr3::mlr_resamplings_holdout] with ratio 1 (all observations in the training set). +#' Note that this function is a wrapper around [autoplot.ResampleResult()] for a temporary [mlr3::ResampleResult] +#' using [mlr3::mlr_resamplings_holdout] with ratio 1 (all observations in the training set). #' #' @param learner ([mlr3::Learner]). #' @param task ([mlr3::Task]). diff --git a/air.toml b/air.toml new file mode 100644 index 00000000..d133d6f9 --- /dev/null +++ b/air.toml @@ -0,0 +1,11 @@ +[format] +line-width = 120 # same as .editorconfig +indent-width = 2 # same as .editorconfig +indent-style = "space" # same as .editorconfig +line-ending = "lf" # same as .editorconfig +persistent-line-breaks = true # preserve existing line breaks when reformatting +default-exclude = true # use built-in default exclusions + + +# you can use "exclude" and "skip" to avoid formatting certain globs +exclude = ["R/bibentries.R"] diff --git a/cspell.json b/cspell.json new file mode 100644 index 00000000..f952ba28 --- /dev/null +++ b/cspell.json @@ -0,0 +1,44 @@ +{ + "version": "0.2", + "language": "en-US", // we always use American English + // whitelist: disable all file types, then re-enable only what we want + "enableFiletypes": ["!*", "markdown", "plaintext", "r", "qmd"], + "useGitignore": true, // respect .gitignore (avoids checking generated/ignored files) + // additional paths to ignore beyond .gitignore + "ignorePaths": [ + "*.Rd", // generated roxygen docs + "*.rda", // R binary data + "*.Rds", // R binary data + "*.lock", // lockfiles + "man/", // generated man pages + "renv/" // renv library + ], + // use cspell R-dictionary + project-specific word list + "dictionaries": ["r", "project-words"], + "dictionaryDefinitions": [ + { + "name": "project-words", + // shared word list — commit this file so the whole team benefits + "path": "./.cspell/project-words.txt", + // enables "Add to project dictionary" command in VS Code / Cursor + "addWords": true + } + ], + "languageSettings": [ + { + "languageId": "r", + // only spell-check comments in R files, not code identifiers + "includeRegExpList": [ + "/#.*$/gm" + ] + }, + { + "languageId": "c,cpp", + // only spell-check comments in C/C++ files, not code identifiers + "includeRegExpList": [ + "//.*", // single-line comments: // ... + "/\\*[\\s\\S]*?\\*/" // block comments: /* ... */ + ] + } + ] +} diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index 25172363..223d3f96 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -6,4 +6,3 @@ if (requireNamespace("lgr")) { old_dt_threads = data.table::getDTthreads() setDTthreads(1) - diff --git a/tests/testthat/test_BenchmarkResult.R b/tests/testthat/test_BenchmarkResult.R index c13f38b8..d122f882 100644 --- a/tests/testthat/test_BenchmarkResult.R +++ b/tests/testthat/test_BenchmarkResult.R @@ -9,9 +9,16 @@ bmr = mlr3::benchmark(mlr3::benchmark_grid(tasks, learner, resampling)) test_that("fortify BenchmarkResult", { f = fortify(bmr, measure = msr("classif.ce")) expect_data_table(f, nrows = 18, ncols = 5) - expect_names(names(f), permutation.of = c( - "nr", "task_id", "learner_id", - "resampling_id", "classif.ce")) + expect_names( + names(f), + permutation.of = c( + "nr", + "task_id", + "learner_id", + "resampling_id", + "classif.ce" + ) + ) }) test_that("autoplot BenchmarkResult", { @@ -38,8 +45,7 @@ test_that("holdout roc plot (#54)", { tasks = tsks("german_credit") learners = c("classif.featureless", "classif.rpart") - learners = lapply(learners, lrn, - predict_type = "prob") + learners = lapply(learners, lrn, predict_type = "prob") resamplings = rsmp("holdout", ratio = .8) # holdout instead of cv @@ -55,14 +61,16 @@ skip_if_not_installed("mlr3inferr") skip_if_not_installed("rpart") test_that("CI plot", { - bmr = benchmark(benchmark_grid(tsks(c("mtcars", "mtcars")), - lrns(c("regr.featureless", "regr.rpart")), rsmp("holdout"))) + bmr = benchmark(benchmark_grid( + tsks(c("mtcars", "mtcars")), + lrns(c("regr.featureless", "regr.rpart")), + rsmp("holdout") + )) p = autoplot(bmr, "ci", msr("ci", "regr.mse")) expect_true(is_ggplot(p)) expect_doppelganger("bmr_holdout_ci", p) - bmr = benchmark(benchmark_grid(tsk("iris"), lrn("classif.rpart"), - rsmps(c("holdout", "cv")))) + bmr = benchmark(benchmark_grid(tsk("iris"), lrn("classif.rpart"), rsmps(c("holdout", "cv")))) expect_error(autoplot(bmr, "ci", msr("ci", "classif.acc")), "one resampling method") }) diff --git a/tests/testthat/test_EnsembleFSResult.R b/tests/testthat/test_EnsembleFSResult.R index 854cd2e5..9033ec36 100644 --- a/tests/testthat/test_EnsembleFSResult.R +++ b/tests/testthat/test_EnsembleFSResult.R @@ -17,12 +17,16 @@ test_that("autoplot ResampleResult", { c("V11", "V18", "V9", "V2"), c("V2"), c("V4", "V12"), - c("V6", "V15", "V19", "V7")), + c("V6", "V15", "V19", "V7") + ), classif.ce = c(0.13, 0.24, 0.16, 0.11, 0.25, 0.18, 0.15, 0.1, 0.16) ) - efsr = mlr3fselect::EnsembleFSResult$new(result = result, features = paste0("V", 1:20), - measure = mlr3::msr("classif.ce")) + efsr = mlr3fselect::EnsembleFSResult$new( + result = result, + features = paste0("V", 1:20), + measure = mlr3::msr("classif.ce") + ) # wrong type gives hint of types a user can input expect_error(autoplot(efsr, type = "XYZ"), regexp = "Must be element of set") diff --git a/tests/testthat/test_LearnerClassifCVGlmnet.R b/tests/testthat/test_LearnerClassifCVGlmnet.R index c88206ae..1bd666a3 100644 --- a/tests/testthat/test_LearnerClassifCVGlmnet.R +++ b/tests/testthat/test_LearnerClassifCVGlmnet.R @@ -8,7 +8,9 @@ set.seed(42) test_that("autoplot.LearnerClassifCVGlmnet", { requireNamespace("mlr3learners") learner = mlr3::lrn("classif.cv_glmnet")$train(mlr3::tsk("wine")) - suppressWarnings({p = autoplot(learner, type = "ggfortify")}) + suppressWarnings({ + p = autoplot(learner, type = "ggfortify") + }) expect_true(is_ggplot(p)) expect_doppelganger("learner_classif.cv_glmnet", p) }) diff --git a/tests/testthat/test_LearnerRegr.R b/tests/testthat/test_LearnerRegr.R index 73c6b8cc..6b60b9ca 100644 --- a/tests/testthat/test_LearnerRegr.R +++ b/tests/testthat/test_LearnerRegr.R @@ -29,4 +29,3 @@ test_that("autoplot.PredictionClassif decision boundary 2D", { expect_true(is_ggplot(p)) expect_doppelganger("learner_regression_2D_se", p) }) - diff --git a/tests/testthat/test_OptimInstanceSingleCrit.R b/tests/testthat/test_OptimInstanceSingleCrit.R index ce4e13a4..3b133c42 100644 --- a/tests/testthat/test_OptimInstanceSingleCrit.R +++ b/tests/testthat/test_OptimInstanceSingleCrit.R @@ -1,7 +1,8 @@ skip_if_not_installed("mlr3") skip_if_not_installed("bbotk") skip_if_not_installed("patchwork") -library("bbotk") # nolint +#nolint next +library("bbotk") requireNamespace("mlr3learners") set.seed(42) diff --git a/tests/testthat/test_PredictionClust.R b/tests/testthat/test_PredictionClust.R index 22dc0ab9..45a501fa 100644 --- a/tests/testthat/test_PredictionClust.R +++ b/tests/testthat/test_PredictionClust.R @@ -15,7 +15,9 @@ test_that("autoplot.PredictionClust", { expect_true(is_ggplot(p)) expect_doppelganger("predictionclust_scatter", p) - suppressWarnings({p = autoplot(prediction, task, type = "sil")}) + suppressWarnings({ + p = autoplot(prediction, task, type = "sil") + }) expect_true(is_ggplot(p)) expect_doppelganger("predictionclust_sil", p) diff --git a/tests/testthat/test_ResampleResult.R b/tests/testthat/test_ResampleResult.R index 696adf53..9a6c84b3 100644 --- a/tests/testthat/test_ResampleResult.R +++ b/tests/testthat/test_ResampleResult.R @@ -9,9 +9,14 @@ rr = mlr3::resample(task, learner, resampling) test_that("fortify ResampleResult", { f = fortify(rr, measure = msr("classif.ce")) expect_data_table(f, nrows = 10, ncols = 3) - expect_names(names(f), identical.to = c( - "iteration", "measure_id", - "performance")) + expect_names( + names(f), + identical.to = c( + "iteration", + "measure_id", + "performance" + ) + ) }) test_that("autoplot ResampleResult", { @@ -68,19 +73,21 @@ test_that("autoplot ResampleResult type=prediction", { } # check errors - rr = resample(mlr3::tsk("iris")$select(c("Sepal.Length", "Sepal.Width")), - lrn("classif.featureless"), resampling, - store_models = FALSE) + rr = resample( + mlr3::tsk("iris")$select(c("Sepal.Length", "Sepal.Width")), + lrn("classif.featureless"), + resampling, + store_models = FALSE + ) expect_error(autoplot(rr, type = "prediction"), regexp = "store_models") - rr = resample(mlr3::tsk("iris"), lrn("classif.featureless"), resampling, - store_models = TRUE) - expect_error(autoplot(rr, type = "prediction"), - regexp = "only works for tasks with two features") - rr = resample(mlr3::tsk("mtcars"), lrn("regr.featureless"), resampling, - store_models = TRUE) - expect_error(autoplot(rr, type = "prediction"), + rr = resample(mlr3::tsk("iris"), lrn("classif.featureless"), resampling, store_models = TRUE) + expect_error(autoplot(rr, type = "prediction"), regexp = "only works for tasks with two features") + rr = resample(mlr3::tsk("mtcars"), lrn("regr.featureless"), resampling, store_models = TRUE) + expect_error( + autoplot(rr, type = "prediction"), regexp = "Plot learner prediction only works with one or two features for -regression!") +regression!" + ) }) diff --git a/tests/testthat/test_TuningInstanceSingleCrit.R b/tests/testthat/test_TuningInstanceSingleCrit.R index 61b2f80a..545fe80a 100644 --- a/tests/testthat/test_TuningInstanceSingleCrit.R +++ b/tests/testthat/test_TuningInstanceSingleCrit.R @@ -18,7 +18,8 @@ instance = TuningInstanceBatchSingleCrit$new( learner = learner, resampling = mlr3::rsmp("cv", folds = 3), measure = mlr3::msr("classif.ce"), - terminator = trm("evals", n_evals = 100)) + terminator = trm("evals", n_evals = 100) +) tuner = tnr("random_search", batch_size = 10) invoke(tuner$optimize, instance, .seed = 123)