Skip to content

Commit 29e34f8

Browse files
lwjohnst86signekbpre-commit-ci[bot]
authored
feat: ✨ helper get_*() for project IDs and directories (#251)
# Description Closes #243, closes #104 Needs a quick review. ## Checklist - [x] Ran `just run-all` --------- Co-authored-by: Signe Kirk Brødbæk <40836345+signekb@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent f88c708 commit 29e34f8

5 files changed

Lines changed: 223 additions & 14 deletions

File tree

R/get.R

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
#' Get the project ID from the current working directory path
2+
#'
3+
#' Gets a numeric project ID from the current working directory path by looking
4+
#' for a folder name with only digits. Errors if a project ID with an unexpected
5+
#' length was found.
6+
#'
7+
#' @returns A 6-digit character string, or `NA` if no project ID is found in the
8+
#' path.
9+
#' @noRd
10+
get_project_id <- function() {
11+
id <- fs::path_wd() |>
12+
stringr::str_extract("/[0-9]+/") |>
13+
stringr::str_remove_all("/")
14+
15+
if (is.na(id) || id == "") {
16+
cli::cli_warn(
17+
c(
18+
"No project ID could be found in the path of the current working directory, so outputting `NA`.",
19+
"i" = "Your path is {fs::path_wd()}. Maybe change to a working directory within a project?"
20+
)
21+
)
22+
}
23+
24+
if (stringr::str_length(id) != 6 && !is.na(id)) {
25+
cli::cli_abort(
26+
"Found an ID, but it was too long or too short to be a project ID.",
27+
c(
28+
"i" = "The ID found was {id}. Project IDs are expected to be 6 digits long."
29+
)
30+
)
31+
}
32+
id
33+
}
34+
35+
#' Get the path to the rawdata or workdata directory for the current project
36+
#'
37+
#' Looks in the [options()] for `fastreg.project_rawdata_dir` and
38+
#' `fastreg.project_workdata_dir` first, and if not found, constructs a path
39+
#' based on the project ID using `get_project_id()`. The constructed path is
40+
#' `E:/<project_id>/rawdata/` for raw data and `E:/<project_id>/workdata/` for n
41+
#' work data.
42+
#'
43+
#' @returns A path object.
44+
#' @noRd
45+
get_project_rawdata_dir <- function() {
46+
rawdata_path <- getOption("fastreg.project_rawdata_dir")
47+
if (!is.null(rawdata_path)) {
48+
return(fs::path(rawdata_path))
49+
}
50+
51+
id <- get_project_id()
52+
if (is.na(id) || id == "") {
53+
cli::cli_abort(
54+
c(
55+
"Can't set the {.path rawdata/} path without a project ID.",
56+
"i" = "Use {.code options(fastreg.project_rawdata_dir = '<path>')} or change into a directory within a project."
57+
)
58+
)
59+
}
60+
61+
glue::glue("E:/{id}/rawdata/") |>
62+
fs::path()
63+
}
64+
65+
#' @describeIn get_project_rawdata_dir Gets the project workdata directory.
66+
#' @noRd
67+
get_project_workdata_dir <- function() {
68+
workdata_path <- getOption("fastreg.project_workdata_dir")
69+
if (!is.null(workdata_path)) {
70+
return(fs::path(workdata_path))
71+
}
72+
73+
id <- get_project_id()
74+
if (is.na(id) || id == "") {
75+
cli::cli_abort(
76+
c(
77+
"Can't set the {.path workdata/} path without a project ID.",
78+
"i" = "Use {.code options(fastreg.project_workdata_dir = '<path>')} or change into a working directory within a project."
79+
)
80+
)
81+
}
82+
glue::glue("E:/{id}/workdata/") |>
83+
fs::path()
84+
}

tests/testthat/test-get.R

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
test_that("`get_project_id()` extracts correct project ID", {
2+
temp_dir <- fs::path_temp("701010/test/project/")
3+
fs::dir_create(temp_dir, recurse = TRUE)
4+
project_id <- withr::with_dir(
5+
temp_dir,
6+
{
7+
get_project_id()
8+
}
9+
)
10+
expect_identical(project_id, "701010")
11+
})
12+
13+
test_that("`get_project_id()` errors for IDs not of length 6", {
14+
temp_dir <- fs::path_temp("70101/test/project/")
15+
fs::dir_create(temp_dir, recurse = TRUE)
16+
project_id <- withr::with_dir(
17+
temp_dir,
18+
{
19+
expect_error(get_project_id(), regexp = "project ID")
20+
}
21+
)
22+
23+
temp_dir <- fs::path_temp("7010101/test/project/")
24+
fs::dir_create(temp_dir, recurse = TRUE)
25+
project_id <- withr::with_dir(
26+
temp_dir,
27+
{
28+
expect_error(get_project_id(), regexp = "project ID")
29+
}
30+
)
31+
})
32+
33+
test_that("`get_project_id()` warns for not finding a project ID", {
34+
temp_dir <- fs::path_temp("non-number/test/project/")
35+
fs::dir_create(temp_dir, recurse = TRUE)
36+
withr::with_dir(
37+
temp_dir,
38+
{
39+
expect_warning(get_project_id(), regexp = "`NA`")
40+
expect_identical(
41+
suppressWarnings(get_project_id()),
42+
NA_character_
43+
)
44+
}
45+
)
46+
})
47+
48+
test_that("should fail if no `E:` drive exists", {
49+
temp_path <- fs::path_temp("701010/")
50+
fs::dir_create(temp_path)
51+
withr::with_dir(
52+
temp_path,
53+
{
54+
expect_error(suppressWarnings(get_project_rawdata_dir()))
55+
expect_error(suppressWarnings(get_project_workdata_dir()))
56+
}
57+
)
58+
})
59+
60+
test_that("takes project directory from options", {
61+
withr::with_options(
62+
list(
63+
fastreg.project_rawdata_dir = fs::path("E:/rawdata/701010/"),
64+
fastreg.project_workdata_dir = fs::path("E:/workdata/701010/")
65+
),
66+
{
67+
expect_identical(
68+
get_project_rawdata_dir(),
69+
fs::path("E:/rawdata/701010/")
70+
)
71+
expect_identical(
72+
get_project_workdata_dir(),
73+
fs::path("E:/workdata/701010/")
74+
)
75+
}
76+
)
77+
})

tests/testthat/test-use-targets.R

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -47,21 +47,21 @@ test_that("targets pipeline template converts SAS files to Parquet", {
4747

4848
# Create temp directory structure.
4949
test_dir <- fs::path_temp("pipeline-test")
50-
input_dir <- fs::path(test_dir, "input")
51-
output_dir <- fs::path(test_dir, "output")
52-
fs::dir_create(input_dir)
53-
fs::dir_create(output_dir)
50+
test_input_dir <- fs::path(test_dir, "input")
51+
test_output_dir <- fs::path(test_dir, "output")
52+
fs::dir_create(test_input_dir)
53+
fs::dir_create(test_output_dir)
5454

5555
# Create SAS files.
5656
bef_list <- simulate_register("bef", c("1999", "2020"))
5757
lmdb_list <- simulate_register("lmdb", c("2020", "2021"))
58-
save_as_sas(bef_list, input_dir)
59-
save_as_sas(lmdb_list, input_dir)
58+
save_as_sas(bef_list, test_input_dir)
59+
save_as_sas(lmdb_list, test_input_dir)
6060

6161
# Read template and replace placeholder paths.
6262
modified_content <- template_content |>
63-
stringr::str_replace("/path/to/sas/directory", input_dir) |>
64-
stringr::str_replace("/path/to/output/directory", output_dir)
63+
stringr::str_replace("/path/to/sas/directory", test_input_dir) |>
64+
stringr::str_replace("/path/to/output/directory", test_output_dir)
6565

6666
# Write and run pipeline.
6767
withr::with_dir(test_dir, {
@@ -70,7 +70,11 @@ test_that("targets pipeline template converts SAS files to Parquet", {
7070
})
7171

7272
# Check number of created Parquet files.
73-
parquet_files <- fs::dir_ls(output_dir, recurse = TRUE, glob = "*.parquet")
73+
parquet_files <- fs::dir_ls(
74+
test_output_dir,
75+
recurse = TRUE,
76+
glob = "*.parquet"
77+
)
7478
expect_equal(
7579
length(parquet_files),
7680
sum(length(bef_list), length(lmdb_list))
@@ -81,13 +85,13 @@ test_that("targets pipeline template converts SAS files to Parquet", {
8185
n_expected_lmdb <- sum(purrr::map_int(lmdb_list, nrow))
8286

8387
n_actual_bef <- arrow::open_dataset(fs::path(
84-
output_dir,
88+
test_output_dir,
8589
"bef"
8690
)) |>
8791
dplyr::collect() |>
8892
nrow()
8993
n_actual_lmdb <- arrow::open_dataset(fs::path(
90-
output_dir,
94+
test_output_dir,
9195
"lmdb"
9296
)) |>
9397
dplyr::collect() |>

vignettes/design.qmd

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ The actions are:
3838
- `list`: List files in a directory, e.g., SAS or Parquet files.
3939
- `read`: Read a Parquet register into R as a DuckDB table.
4040
- `use`: Use a template in the current project.
41+
- `get`: Get or guess some information, e.g., the project ID, workdata
42+
directory, or rawdata directory from the current working directory.
4143

4244
While the objects are:
4345

@@ -114,11 +116,10 @@ across chunks of the same partition (e.g. `part-*.parquet`).
114116
%%| fig-cap: "Expected workflow for reading a Parquet register as a DuckDB table using `read_register()`."
115117
%%| fig-alt: "A flowchart showing the expected flow of reading a Parquet register created with the fastreg package."
116118
flowchart TD
117-
path[/"path<br>[Character scalar]"/]
119+
name[/"name<br>[Character scalar]"/]
118120
read_register("read_register()")
119121
output[/"Output<br>[DuckDB table]"/]
120122
121123
%% Edges
122-
path --> read_register --> output
123-
124+
name --> read_register --> output
124125
```

vignettes/fastreg.qmd

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,49 @@ save_as_sas(
7474
show_tree(sas_dir)
7575
```
7676

77+
## Settings to correct paths
78+
79+
Many of fastreg's functions depend on the locations of the original SAS
80+
files and the eventual Parquet files including the conversion, writing,
81+
and reading functions. Through `options()` you can set these paths in
82+
two settings: `fastreg.project_rawdata_dir` and
83+
`fastreg.project_workdata_dir`. You can set these `options()` at the top
84+
of your R script or Quarto document, in your R Project's `.Rprofile`, or
85+
in your user-level `.Rprofile`. To add to the file, at the top of an R
86+
script, write (using a temporary directory here for these examples):
87+
88+
```{r options}
89+
options(
90+
# With a fake project ID.
91+
fastreg.project_rawdata_dir = fs::path_temp("rawdata/701010/"),
92+
fastreg.project_workdata_dir = fs::path_temp("workdata/701010/")
93+
)
94+
```
95+
96+
If you want to set those exact same options in the R Project's
97+
`.Rprofile`, run the following line in your Console to open up the
98+
`.Rprofile` file for the project:
99+
100+
```{r options-project-profile}
101+
#| filename: "Console"
102+
#| eval: false
103+
usethis::edit_r_profile("project")
104+
```
105+
106+
You can then add the same `options()` as shown in the R script example
107+
above to that file and save it. The next time you open the project,
108+
those options will be set.
109+
110+
If you want to set these options for all of your R projects and
111+
sessions, you can add them globally in your user-level `.Rprofile`. To
112+
open the `.Rprofile`, run:
113+
114+
```{r options-user-profile}
115+
#| filename: "Console"
116+
#| eval: false
117+
usethis::edit_r_profile("user")
118+
```
119+
77120
## Converting a single file
78121

79122
Converting one file from SAS to Parquet in fastreg isn't a simple change

0 commit comments

Comments
 (0)