mlr-org
diff --git a/‎paper/.Rprofile‎
Lines changed: 4 additions & 0 deletions b/‎paper/.Rprofile‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎paper/README.md‎
Lines changed: 41 additions & 26 deletions b/‎paper/README.md‎
Lines changed: 41 additions & 26 deletions
diff --git a/‎paper/benchmark/benchmark.R‎
Lines changed: 55 additions & 1 deletion b/‎paper/benchmark/benchmark.R‎
Lines changed: 55 additions & 1 deletion
diff --git a/‎paper/benchmark/linux-cpu.R‎
Lines changed: 1 addition & 4 deletions b/‎paper/benchmark/linux-cpu.R‎
Lines changed: 1 addition & 4 deletions
diff --git a/‎paper/benchmark/linux-gpu-optimizer.R‎
Lines changed: 6 additions & 3 deletions b/‎paper/benchmark/linux-gpu-optimizer.R‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎paper/benchmark/linux-gpu.R‎
Lines changed: 6 additions & 4 deletions b/‎paper/benchmark/linux-gpu.R‎
Lines changed: 6 additions & 4 deletions
diff --git a/‎paper/benchmark/time_rtorch.R‎
Lines changed: 0 additions & 3 deletions b/‎paper/benchmark/time_rtorch.R‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎paper/extract.R‎
Lines changed: 10 additions & 1 deletion b/‎paper/extract.R‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎paper/paper_code.R‎
Lines changed: 11 additions & 7 deletions b/‎paper/paper_code.R‎
Lines changed: 11 additions & 7 deletions
@@ -0,0 +1,4 @@
+# in enroot, we need the /mnt/data/paper heuristics, because .dockerenv does not exist
+if (!(file.exists("/.dockerenv") || file.exists("/mnt/data/paper"))) {
+  source("renv/activate.R")
+}
@@ -5,39 +5,38 @@ Note that there is also a brief section on reproducibility in the appendix of th
 ## Computational Environment
 
 In order to reproduce the results, you can either use the provided docker images or recreate the `renv` environment that is described in `paper/renv.lock`.
-To work with the renv environment, go into the `paper` directory, which will bootstrap the environment, and then run:
+To work with the renv environment, go into the `paper` directory and start an interactive R session, which will bootstrap the `renv` package.
+Then, restore the environment by running:
 
 ```r
 renv::restore()
 ```
 
-Afterwards, you need to install torch:
+It will prompt ask you whether you want to proceed installing the missing packages, which you have to confirm.
+Afterwards, you need to install torch via:
 
-```{r}
+```r
 torch::install_torch()
 ```
 
 We are providing two docker images, one for CPU and one for CUDA GPU that have the same packages from the `renv.lock` file installed.
-The images can be downloaded from Zenodo: https://doi.org/10.5281/zenodo.17130368.
-You can, for example, use the [zenodo_client](https://pypi.org/project/zenodo-client/) library to download the images:
+The images can be downloaded from Zenodo: [https://doi.org/10.5281/zenodo.17130368](https://doi.org/10.5281/zenodo.17130368), either via the web interface, or, for example, using `wget` or a similar tool:
 
 ```bash
-# pip install zenodo-client
-export ZENODO_API_TOKEN=<your-token>
-zenodo_client download 17130368 IMAGE_CPU.tar.gz
+# Docker images
+wget https://zenodo.org/records/18466801/files/IMAGE_CPU.tar.gz
+wget https://zenodo.org/records/18466801/files/IMAGE_GPU.tar.gz
 ```
 
-By default, the downloaded files are stored in `~/.data/zenodo`.
-
 At the time of writing, the images are also hosted on dockerhub, but this is not a permanent storage:
-https://hub.docker.com/repository/docker/sebffischer/mlr3torch-jss/general
+[https://hub.docker.com/repository/docker/sebffischer/mlr3torch-jss/general](https://hub.docker.com/repository/docker/sebffischer/mlr3torch-jss/general)
 
 The `Dockerfile`s used to create the images are available in the `./paper/envs` directory.
 
-If you have downloaded the images like shown above, you can load them into Docker, e.g. via the command below (or otherwise adjust the path accordingly).
+When downloading the image from zenodo, you can register them with docker as follows:
 
 ```bash
-docker load -i ~/.data/zenodo/17130368/v1/IMAGE_CPU.tar.gz
+docker load -i /path/to/IMAGE_CPU.tar.gz
 ```
 
 To start the CPU docker container, run:
@@ -52,11 +51,11 @@ cd /mnt/data/paper
 The CUDA image can be started with the command below, which requires the [nvidia extension](https://docs.nvidia.com/ai-enterprise/deployment/vmware/latest/docker.html).
 
 ```bash
-docker run -it --gpus all --rm -v ../:/mnt/data sebffischer/mlr3torch-jss:gpu
+docker run -it --gpus all --rm -v <parent-dir-to-paper>:/mnt/data sebffischer/mlr3torch-jss:gpu
 cd /mnt/data/paper
 ```
 
-Note that the `.Rprofile` file ensures that when running R programs from the `paper` directory, the renv environment will be used unless the code is run in the docker container, where we are not relying on renv directly.
+Note that the `.Rprofile` file in `paper` ensures that when running R programs from the `paper` directory, the renv environment will be used unless the code is run in the docker container, where we are not relying on renv directly.
 
 ## Running the Benchmark
 
@@ -82,7 +81,19 @@ Also note that it's important to have enough RAM, otherwise the benchmarks will
 However, there are many other factors, such as the exact hardware that make it generally difficult to reproduce the runtime results.
 
 To run the benchmarks locally, ensure that you are in the `paper` directory.
-To run the GPU benchmarks (using the CUDA docker image) on linux, run:
+There are three scripts:
+
+* `paper/benchmark/linux-gpu.R`, which creates the folder `paper/benchmark/registry-linux-gpu`
+* `paper/benchmark/linux-cpu.R`, which creates the folder `paper/benchmark/registry-linux-cpu`
+* `paper/benchmark/linux-gpu-optimizer.R`, which creates the folder `paper/benchmark/registry-linux-gpu-optimizer`
+
+**Important**: If one of the folders already exists and you want to re-run the benchmarks, you need to delete or move the folder, otherwise you will get an error.
+This is to ensure that the benchmark results are not accidentally overwritten.
+
+To run the benchmarks, either start it via Rscript or source it interactively.
+If you source it interactively and the registry folder already exists, it will ask you whether you want to delete it, which you have to confirm.
+
+Below is the command for the GPU benchmark which needs to be run within the CUDA docker image.
 
 ```bash
 Rscript benchmark/linux-gpu.R
@@ -100,7 +111,7 @@ To run the benchmark that compares "ignite" with standard optimizers (using the
 Rscript benchmark/linux-gpu-optimizer.R
 ```
 
-The results are stored in:
+The postprocessd results are stored in:
 
 * `paper/benchmark/result-linux-gpu.rds`
 * `paper/benchmark/result-linux-cpu.rds`
@@ -150,21 +161,18 @@ We provide the results of running this in `paper/paper_results`.
 The results in the paper are those from the CPU docker image and they were fully reproducible when we re-ran them on the same machine.
 There were some minor differences in results when re-running the code on a different machine (macOS with M1 CPU vs Linux with Intel CPU).
 
-The file `paper_code.R` contains some very minor differences to the paper we omitted in the paper for brevity.
+The file `paper_code.R` contains some very minor differences to the paper, which we omitted in the paper for brevity.
 It was extracted from the tex manuscript almost fully programmatically but adjusted with the following modifications:
 
 * Time measurements (`Sys.time()`)
 * Deactivate knitr caching
 * Activating caching for `mlr3torch`
 * Changing the `mlr3` logging level to `warn` for cleaner output
-* Saving the ROC plot for postprocessing
+* Processing the ROC plot for better readability and saving it as `roc.png`, as well as printing it.
 * Adding a `sessionInfo()` call at the end
 
 We also added some additional comments to make it easier to associate the code with the paper.
 
-The results we obtained via `knitr::spin()` are stored in `paper/paper_results/`
-The ROC plot is postprocessed using the `roc.R` script and we have also provided the resulting `roc.png` from the paper in the `paper/paper_results` directory.
-
 ### Possible Data Unavailability
 
 The code shown in the paper downloads various datasets from standard resources.
@@ -175,18 +183,25 @@ In the unlikely but possible event that these datasets are not available anymore
 
 in the Zenodo data.
 
-If one of the downloads (1) fails, download the `cache.tar.gz` file from zenodo, untar it and put it in the location where the cache is (put the `R` folder of the cache into `/root/.cache/R` and the `torch` folder into `/root/.cache/torch` when using the docker images).
+If one of the downloads (1) fails, do the following (before starting the docker container):
 
-If (2) fails, download `dogs-vs-cats.tar.gz` from Zenodo, untar it and put it into the `paper/data` subdirectory where you are running the `paper_code.R` (so the directory structure is `paper/data/dogs-vs-cats`).
+1. Download the `cache.tar.gz` file, e.g. via:
+   ```bash
+   wget https://zenodo.org/records/18466801/files/cache.tar.gz
+   ```
+2. Unpack the file using `tar -xzf cache.tar.gz` which creates a folder named `cache`
+3. Move this folder into the parent directory of `paper`
 
-To do this in the Docker image you can, e.g., put the files into the parent directory of the `paper` directory (which will be mounted) and then after starting the container, copy the files into the correct location.
-Assuming the unpacked cache files are in `/mnt/data/cache`, you can copy them into the correct location with:
+After starting the docker container with the correct mount instructions (like shown earlier) run:
 
 ```bash
 cp -r /mnt/data/cache/R/mlr3torch /root/.cache/R
 cp -r /mnt/data/cache/torch /root/.cache/torch
 ```
 
+If (2) fails, download `dogs-vs-cats.tar.gz` from Zenodo, untar it and put it into the `paper/data` subdirectory where you are running the `paper_code.R` (so the directory structure is `paper/data/dogs-vs-cats/`).
+
+
 ### Other errors
 
 When reproducing the results with `knitr` in the docker container, we sometimes encountered issues with the weight downloads for the ResNet-18 model.
 
@@ -2,10 +2,28 @@ library(batchtools)
 library(mlr3misc)
 
 setup = function(reg_path, python_path, work_dir) {
+
+  print_setup_info(reg_path, python_path, work_dir)
+  
+  
+  if (file.exists(reg_path)) {
+    msg <- sprintf("Registry already exists at path %s. Delete the folder it to run the benchmark again.", reg_path)
+    if (!interactive()) {
+      stop(msg)
+    }
+    answer <- readline(sprintf("Registry already exists at path %s. Delete it to run the benchmark again? (y/n)", reg_path))
+     if (answer == "y") {
+       unlink(reg_path, recursive = TRUE)
+     } else {
+       stop(msg)
+     }
+  }
+
   reg = makeExperimentRegistry(
     file.dir = reg_path,
     work.dir = work_dir,
-    packages = "checkmate"
+    packages = "checkmate",
+    seed = 123
   )
   reg$cluster.functions = makeClusterFunctionsInteractive()
 
@@ -48,6 +66,7 @@ setup = function(reg_path, python_path, work_dir) {
   )
 
   addAlgorithm("pytorch", fun = function(instance, job, data, jit, ...) {
+    print(instance)
     f = function(..., python_path) {
       library(reticulate)
       x = try(
@@ -68,6 +87,7 @@ setup = function(reg_path, python_path, work_dir) {
   })
 
   addAlgorithm("rtorch", fun = function(instance, job, opt_type, jit, ...) {
+    print(instance)
     assert_choice(opt_type, c("standard", "ignite"))
     if (opt_type == "ignite") {
       instance$optimizer = paste0("ignite_", instance$optimizer)
@@ -77,6 +97,7 @@ setup = function(reg_path, python_path, work_dir) {
   })
 
   addAlgorithm("mlr3torch", fun = function(instance, job, opt_type, jit, ...) {
+    print(instance)
     if (opt_type == "ignite") {
       instance$optimizer = paste0("ignite_", instance$optimizer)
     }
@@ -93,3 +114,36 @@ REPLS = 10L
 EPOCHS = 20L
 N = 2000L
 P = 1000L
+
+print_setup_info = function(reg_path, python_path, work_dir) {
+  cat("Session Info:\n")
+  print(sessionInfo())
+  cat("Library Paths:\n")
+  for (path in .libPaths()) {
+    cat("  -", path, "\n")
+  }
+  cat("Working Directory:", getwd(), "\n")
+
+  cat("Subfolders of working directory:\n")
+  for (folder in list.files(work_dir)) {
+    cat("  -", folder, "\n")
+  }
+
+  # Function arguments
+  cat("--- FUNCTION ARGUMENTS ---\n")
+  cat("  Registry Path:", reg_path, "\n")
+  cat("  Python Path:", python_path, " (", if (file.exists(python_path)) "exists" else "does not exist", ")\n")
+  cat("  Work Directory:", work_dir, "\n\n")
+  cat("Cuda is available:", torch::cuda_is_available(), "\n")
+  out <- try(callr::r(function(python_path) {
+    reticulate::use_python(python_path, required = TRUE)
+    return(reticulate::py_config())
+  }, show = TRUE, args = list(python_path = python_path)), silent = TRUE)
+  if (inherits(out, "try-error")) {
+    cat("Error occurred while calling Python:\n")
+    print(out)
+  } else {
+    cat("Python configuration:\n")
+    print(out)
+  }
+}
@@ -1,15 +1,12 @@
 library(here)
 
+set.seed(42)
 source(here("benchmark", "benchmark.R"))
 
 # Change this when not running this in the docker image
 # Below is the correct python path for the CPU docker image.
 PYTHON_PATH = "/opt/venv/bin/python"
 
-if (dir.exists(here("benchmark", "registry-linux-gpu"))) {
-  stop("Registry already exists. Delete it to run the benchmark again.")
-}
-
 setup(
   here("benchmark", "registry-linux-cpu"),
   PYTHON_PATH,
 
@@ -2,10 +2,16 @@ library(here)
 
 source(here("benchmark", "benchmark.R"))
 
+set.seed(43)
+
 # Change this when not running this in the docker image
 # Below is the correct python path for the CUDA docker image
 PYTHON_PATH = "/usr/bin/python3"
 
+if (!torch::cuda_is_available()) {
+  stop("Cuda is not available for R-torch, please use the correct docker image.")
+}
+
 problem_design = expand.grid(
   list(
     n = N,
@@ -20,9 +26,6 @@ problem_design = expand.grid(
   stringsAsFactors = FALSE
 )
 
-if (dir.exists(here("benchmark", "registry-linux-gpu-optimizer"))) {
-  stop("Registry already exists. Delete it to run the benchmark again.")
-}
 
 setup(
   here("benchmark", "registry-linux-gpu-optimizer"),
 
@@ -2,10 +2,16 @@ library(here)
 
 source(here("benchmark", "benchmark.R"))
 
+set.seed(44)
+
 # Change this when not running this in the docker image
 # Below is the correct python path for the CUDA docker image
 PYTHON_PATH = "/usr/bin/python3"
 
+if (!torch::cuda_is_available()) {
+  stop("Cuda is not available for R-torch, please use the correct docker image.")
+}
+
 problem_design = expand.grid(
   list(
     n = N,
@@ -20,10 +26,6 @@ problem_design = expand.grid(
   stringsAsFactors = FALSE
 )
 
-if (dir.exists(here("benchmark", "registry-linux-gpu"))) {
-  stop("Registry already exists. Delete it to run the benchmark again.")
-}
-
 setup(
   here("benchmark", "registry-linux-gpu"),
   PYTHON_PATH,
 
@@ -1,9 +1,6 @@
 time_rtorch = function(epochs, batch_size, n_layers, latent, n, p, device, jit, seed, optimizer, mlr3torch = FALSE) {
   library(mlr3torch)
   library(torch)
-  mlr3pipelines::po
-  mlr3torch::LearnerTorch
-  mlr3::lrn
   torch_set_num_threads(1)
   torch_manual_seed(seed)
 
 
@@ -68,7 +68,16 @@ code_lines <- c(
   "options(mlr3torch.cache = TRUE)",
   "lgr::get_logger(\"mlr3\")$set_threshold(\"warn\")",
   code_lines,
-  "saveRDS(plt, \"roc.rds\")",
+  "library(\"ggplot2\")",
+  "plt = plt +",
+  "  theme(",
+  "    axis.text.x = element_text(size = 12),",
+  "    axis.text.y = element_text(size = 12),",
+  "    axis.title.x = element_text(size = 12),",
+  "    axis.title.y = element_text(size = 12)",
+  "  )",
+  "print(plt)",
+  "ggsave(here::here(\"roc.png\"), plt, width = 4, height = 4, dpi = 300)",
   "Sys.time()",
   "sessionInfo()"
 )
 
@@ -5,11 +5,7 @@
 # Some setup code
 Sys.time()
 options(mlr3torch.cache = TRUE)
-lgr::get_logger('mlr3')$set_threshold('warn')
-
-# 2.2 Main dependencies
-
-# mlr3
+lgr::get_logger("mlr3")$set_threshold("warn")
 library("mlr3")
 set.seed(42)
 task <- tsk("mtcars")
@@ -367,7 +363,15 @@ task_subset$filter(subset)
 rr <- resample(task_subset, glrn, rsmp("holdout"))
 plt <- autoplot(rr, type = "roc")
 
-# Save plot so it can be modified later
-saveRDS(plt, "roc.rds")
+library("ggplot2")
+plt = plt +
+  theme(
+    axis.text.x = element_text(size = 12),
+    axis.text.y = element_text(size = 12),
+    axis.title.x = element_text(size = 12),
+    axis.title.y = element_text(size = 12)
+  )
+print(plt)
+ggsave(here::here("roc.png"), plt, width = 4, height = 4, dpi = 300)
 Sys.time()
 sessionInfo()