Skip to content

Commit 619a36e

Browse files
authored
test: ✅ add tests for when cols and schemas differ (#270)
# Description So we cover the cases of different schemas in the tests, so it's clear(er) to us what happens in these scenarios. Needs a thorough review. ## Checklist - [X] Ran `just run-all`
1 parent 36cd895 commit 619a36e

2 files changed

Lines changed: 65 additions & 2 deletions

File tree

tests/testthat/test-convert.R

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,3 +213,19 @@ test_that("convert_register() converts larger files with chunking", {
213213
))
214214
expect_equal(n_actual, n_expected)
215215
})
216+
217+
test_that("convert_register() doesn't error with incompatible schemas", {
218+
# Create a bef file where numeric columns are changed to character, so
219+
# the schema is incompatible with the other bef files.
220+
incompatible_data <- bef_list[[1]] |>
221+
dplyr::mutate(dplyr::across(where(is.numeric), as.character))
222+
223+
incompatible_sas_path <- fs::path_temp("sas_schema_incompatible")
224+
save_as_sas(list(bef2099 = incompatible_data), incompatible_sas_path)
225+
sas_incompatible <- c(sas_bef, fs::dir_ls(incompatible_sas_path))
226+
227+
expect_no_error(convert_register(
228+
path = sas_incompatible,
229+
output_dir = fs::path_temp("incompatible_schemas")
230+
))
231+
})

tests/testthat/test-read.R

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,13 @@ test_that("read_register() reads a single Parquet file", {
2929
expect_equal(
3030
# year col doesn't exist when only one file is read.
3131
actual_data |> dplyr::select(-"source_file"),
32-
expected_data
32+
expected_data,
33+
ignore_attr = TRUE
34+
)
35+
expect_all_equal(
36+
actual_data$source_file,
37+
expected_source_file
3338
)
34-
expect_all_equal(actual_data$source_file, expected_source_file)
3539
})
3640

3741
test_that("read_register() reads a partitioned Parquet register", {
@@ -101,3 +105,46 @@ test_that("files with extension .parq can also be read", {
101105
arrow::write_parquet(simulate_register("bef")[[1]], sink = path)
102106
expect_no_error(read_register(path))
103107
})
108+
109+
110+
test_that("read_register() reads files with different columns", {
111+
# Faux bef with lmdb structure, saved separately and combined with sas_bef.
112+
lmdb_list <- simulate_register("lmdb", year = c("2021"))
113+
names(lmdb_list) <- "bef2021"
114+
lmdb_sas_path <- fs::path_temp("sas_lmdb_as_bef")
115+
save_as_sas(lmdb_list, lmdb_sas_path)
116+
sas_diff_cols <- c(sas_bef, fs::dir_ls(lmdb_sas_path))
117+
118+
diff_cols_output <- fs::path_temp("diff_cols")
119+
convert_register(path = sas_diff_cols, output_dir = diff_cols_output)
120+
121+
# Define expected columns.
122+
expected <- purrr::map(c("bef", "lmdb"), \(x) {
123+
simulate_register(x, n = 1)[[1]]
124+
}) |>
125+
purrr::map(colnames) |>
126+
purrr::list_c() |>
127+
unique() |>
128+
c("source_file", "year")
129+
130+
expect_identical(
131+
sort(expected),
132+
sort(read_register(diff_cols_output) |> colnames())
133+
)
134+
})
135+
136+
test_that("read_register() errors with incompatible schemas", {
137+
# Create a bef file where numeric columns are changed to character, so
138+
# the schema is incompatible with the other bef files.
139+
incompatible_data <- bef_list[[1]] |>
140+
dplyr::mutate(dplyr::across(where(is.numeric), as.character))
141+
142+
incompatible_sas_path <- fs::path_temp("sas_schema_incompatible")
143+
save_as_sas(list(bef2099 = incompatible_data), incompatible_sas_path)
144+
sas_incompatible <- c(sas_bef, fs::dir_ls(incompatible_sas_path))
145+
146+
incompatible_output <- fs::path_temp("incompatible")
147+
convert_register(path = sas_incompatible, output_dir = incompatible_output)
148+
149+
expect_error(read_register(incompatible_output), "incompatible")
150+
})

0 commit comments

Comments
 (0)