Skip to content

Commit 37bbe80

Browse files
authored
refactor: 🚚 rename convert_file() -> convert() (#281)
# Description Closes #278 Needs a quick review. ## Checklist - [X] Ran `just run-all`
1 parent 63d7bd7 commit 37bbe80

11 files changed

Lines changed: 49 additions & 49 deletions

File tree

NAMESPACE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Generated by roxygen2: do not edit by hand
22

3-
export(convert_file)
3+
export(convert)
44
export(list_sas_files)
55
export(read_parquet_file)
66
export(read_parquet_partition)

R/convert.R

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,11 @@
1616
#' @export
1717
#' @examples
1818
#' sas_file <- fs::path_package("fastreg", "extdata", "test.sas7bdat")
19-
#' convert_file(
19+
#' convert(
2020
#' path = sas_file,
2121
#' output_dir = fs::path_temp("path/to/output/file")
2222
#' )
23-
convert_file <- function(
23+
convert <- function(
2424
path,
2525
output_dir,
2626
chunk_size = 10000000L
@@ -69,7 +69,7 @@ convert_file <- function(
6969
#' Read SAS chunk
7070
#'
7171
#' @param skip Number of rows to skip.
72-
#' @inheritParams convert_file
72+
#' @inheritParams convert
7373
#'
7474
#' @returns A tibble.
7575
#'
@@ -86,7 +86,7 @@ read_sas_chunk <- function(path, skip, chunk_size) {
8686
#' Gets the year and register name from the file name in `path` and creates
8787
#' a partition path `{output_dir}/{register_name}/year={year}/`.
8888
#'
89-
#' @inheritParams convert_file
89+
#' @inheritParams convert
9090
#'
9191
#' @returns The partition path.
9292
#'

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,13 +67,13 @@ pak::pak("dp-next/fastreg")
6767

6868
## Usage
6969

70-
Use `convert_file()` to convert a single SAS file to Parquet in Hive
70+
Use `convert()` to convert a single SAS file to Parquet in Hive
7171
partition format:
7272

7373
``` r
7474
library(fastreg)
7575

76-
convert_file(
76+
convert(
7777
path = "path/to/file.sas7bdat",
7878
output_dir = "path/to/output_dir/"
7979
)

README.qmd

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,13 +67,13 @@ pak::pak("dp-next/fastreg")
6767

6868
## Usage
6969

70-
Use `convert_file()` to convert a single SAS file to Parquet in Hive
70+
Use `convert()` to convert a single SAS file to Parquet in Hive
7171
partition format:
7272

7373
```{r, eval = FALSE}
7474
library(fastreg)
7575
76-
convert_file(
76+
convert(
7777
path = "path/to/file.sas7bdat",
7878
output_dir = "path/to/output_dir/"
7979
)

_pkgdown.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ navbar:
1313
reference:
1414
- title: Main functions
1515
- contents:
16-
- starts_with("convert")
16+
- convert
1717
- read_register
1818
- read_parquet
1919
- use_targets_template

inst/template-targets.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ list(
7979
# directory having been cleaned.
8080
tar_target(
8181
name = parquet_files,
82-
command = convert_file(path = sas_paths, output_dir = output_dir),
82+
command = convert(path = sas_paths, output_dir = output_dir),
8383
pattern = map(sas_paths),
8484
cue = tar_cue(mode = "always")
8585
)
Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-convert.R

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,11 @@ sas_path <- fs::path_temp("sas_bef")
1010
save_as_sas(bef_list, sas_path)
1111
sas_bef <- fs::dir_ls(sas_path)
1212

13-
# Test convert_file() ----------------------------------------------------------
13+
# Test convert() ----------------------------------------------------------
1414

1515
# Setup: Convert single file
1616
single_file_path <- fs::path_temp("parquet_single_file")
17-
single_file_output <- convert_file(
17+
single_file_output <- convert(
1818
path = sas_bef[[1]],
1919
output_dir = single_file_path
2020
)
@@ -25,11 +25,11 @@ data_actual <- arrow::open_dataset(
2525
dplyr::as_tibble()
2626
data_expected <- haven::read_sas(sas_bef[[1]])
2727

28-
test_that("convert_file() returns output_dir", {
28+
test_that("convert() returns output_dir", {
2929
expect_equal(single_file_output, single_file_path)
3030
})
3131

32-
test_that("convert_file() preserves source data and adds expected columns", {
32+
test_that("convert() preserves source data and adds expected columns", {
3333
expect_equal(nrow(data_actual), nrow(data_expected))
3434
expect_identical(
3535
data_actual |> dplyr::select(-c("source_file", "year")),
@@ -48,7 +48,7 @@ test_that("convert_file() preserves source data and adds expected columns", {
4848
)
4949
})
5050

51-
test_that("convert_file() creates parts with expected naming pattern", {
51+
test_that("convert() creates parts with expected naming pattern", {
5252
actual <- fs::path_file(fs::dir_ls(
5353
single_file_path,
5454
recurse = TRUE,
@@ -57,33 +57,33 @@ test_that("convert_file() creates parts with expected naming pattern", {
5757
expect_true(all(stringr::str_detect(actual, "^part-[a-f0-9]{6}\\.parquet$")))
5858
})
5959

60-
test_that("convert_file() errors with incorrect input parameters", {
60+
test_that("convert() errors with incorrect input parameters", {
6161
# Incorrect path type.
6262
expect_error(
63-
convert_file(path = 1, output_dir = single_file_output),
63+
convert(path = 1, output_dir = single_file_output),
6464
regexp = "character"
6565
)
6666
# Path must exist.
6767
expect_error(
68-
convert_file(path = fs::file_temp(), output_dir = single_file_output),
68+
convert(path = fs::file_temp(), output_dir = single_file_output),
6969
regexp = "does not exist"
7070
)
7171
# Incorrect output_dir type.
7272
expect_error(
73-
convert_file(path = sas_bef[[1]], output_dir = 1),
73+
convert(path = sas_bef[[1]], output_dir = 1),
7474
regexp = "string"
7575
)
7676
# output_dir must be scalar.
7777
expect_error(
78-
convert_file(
78+
convert(
7979
path = sas_bef[[1]],
8080
output_dir = rep(single_file_output, times = 2)
8181
),
8282
regexp = "length 1"
8383
)
8484
# Incorrect chunk size (lower than allowed).
8585
expect_error(
86-
convert_file(
86+
convert(
8787
path = sas_bef[[1]],
8888
output_dir = single_file_output,
8989
chunk_size = 10L
@@ -92,7 +92,7 @@ test_that("convert_file() errors with incorrect input parameters", {
9292
)
9393
})
9494

95-
test_that("convert_file() partitions by year based on file name", {
95+
test_that("convert() partitions by year based on file name", {
9696
expected <- fs::path(
9797
single_file_output,
9898
register_name,
@@ -107,12 +107,12 @@ test_that("convert_file() partitions by year based on file name", {
107107
)
108108
})
109109

110-
test_that("convert_file() creates expected n parts when chunk_size < nrow", {
110+
test_that("convert() creates expected n parts when chunk_size < nrow", {
111111
chunks_path <- fs::path_temp("chunks_path")
112112
chunk_size <- 10000L
113113
sas_file <- sas_bef[[1]]
114114

115-
convert_file(
115+
convert(
116116
path = sas_file,
117117
output_dir = chunks_path,
118118
chunk_size = chunk_size

tests/testthat/test-read.R

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ output_dir <- fs::path_temp("output_dir")
88

99
# Convert files.
1010
purrr::walk(sas_bef, \(path) {
11-
convert_file(path, output_dir)
11+
convert(path, output_dir)
1212
})
1313

1414
# Test read_register() ---------------------------------------------------------
@@ -121,7 +121,7 @@ test_that("read_register() reads files with different columns", {
121121

122122
# Convert files.
123123
purrr::walk(sas_diff_cols, \(path) {
124-
convert_file(path, diff_cols_output)
124+
convert(path, diff_cols_output)
125125
})
126126

127127
# Define expected columns.
@@ -152,7 +152,7 @@ test_that("read_register() errors with incompatible schemas", {
152152
incompatible_output <- fs::path_temp("incompatible")
153153
# Convert files.
154154
purrr::walk(sas_incompatible, \(path) {
155-
convert_file(path, incompatible_output)
155+
convert(path, incompatible_output)
156156
})
157157

158158
expect_error(read_register(incompatible_output), "incompatible")

vignettes/design.qmd

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -57,20 +57,20 @@ page.
5757

5858
```{mermaid}
5959
%%| label: fig-flow
60-
%%| fig-cap: "Expected workflow for converting one SAS file using `convert_file()`."
60+
%%| fig-cap: "Expected workflow for converting one SAS file using `convert()`."
6161
%%| fig-alt: "A flowchart showing the expected flow of converting register SAS files to Parquet files."
6262
flowchart TD
6363
identify_paths("Identify register path(s)<br>with list_sas_files(path)")
6464
path[/"path<br>[Character vector]"/]
6565
output_dir[/"output_dir<br>[Character scalar]"/]
6666
chunk_size[/"chunk_size<br>[Integer scalar]"/]
67-
convert_file("convert_file()")
67+
convert("convert()")
6868
output[/"Parquet file(s)<br>written to output_dir"/]
6969
7070
%% Edges
71-
identify_paths -.-> path --> convert_file
72-
output_dir & chunk_size --> convert_file
73-
convert_file --> output
71+
identify_paths -.-> path --> convert
72+
output_dir & chunk_size --> convert
73+
convert --> output
7474
7575
%% Style
7676
style identify_paths fill:#FFFFFF, color:#000000, stroke-dasharray: 5 5
@@ -96,13 +96,13 @@ flowchart TD
9696
```
9797

9898
::: callout-warning
99-
`convert_file()`, the core function behind converting SAS files to
100-
Parquet used within the targets template, creates an Arrow schema with
101-
data types based on the first file chunk. This means that data type
102-
schemas are defined *within* files only. As a result, if there's a drift
103-
in data types across SAS files in the same register, this may not be
104-
identified in the conversion process, but will become evident when
105-
attempting to read the register.
99+
`convert()`, the core function behind converting SAS files to Parquet
100+
used within the targets template, creates an Arrow schema with data
101+
types based on the first file chunk. This means that data type schemas
102+
are defined *within* files only. As a result, if there's a drift in data
103+
types across SAS files in the same register, this may not be identified
104+
in the conversion process, but will become evident when attempting to
105+
read the register.
106106

107107
We use this design to ensure that subsequent chunks follow the same
108108
schema as the first, as we don't want to have different data types

0 commit comments

Comments
 (0)