Skip to content

Add support for clock functions #627

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
47 changes: 41 additions & 6 deletions R/dplyr.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@

sql <- dbplyr::sql_query_fields(src$con, from)
dataset <- if (!is.null(src$con@dataset)) as_bq_dataset(src$con)
schema <- bq_perform_query_schema(sql,
schema <- bq_perform_query_schema(sql,

Check warning on line 53 in R/dplyr.R

View check run for this annotation

Codecov / codecov/patch

R/dplyr.R#L53

Added line #L53 was not covered by tests
billing = src$con@billing,
default_dataset = dataset
)
Expand Down Expand Up @@ -129,19 +129,19 @@
# registered onLoad

#' Collect a BigQuery table
#'
#'
#' This collect method is specialised for BigQuery tables, generating the
#' SQL from your dplyr commands, then calling [bq_project_query()]
#' or [bq_dataset_query()] to run the query, then [bq_table_download()]
#' or [bq_dataset_query()] to run the query, then [bq_table_download()]
#' to download the results. Thus the arguments are a combination of the
#' arguments to [dplyr::collect()], `bq_project_query()`/`bq_dataset_query()`,
#' and `bq_table_download()`.
#'
#'
#' @inheritParams dplyr::collect
#' @inheritParams bq_table_download
#' @param n Maximum number of results to retrieve.
#' @param n Maximum number of results to retrieve.
#' The default, `Inf`, will retrieve all rows.
#' @param ... Other arguments passed on to
#' @param ... Other arguments passed on to
#' `bq_project_query()`/`bq_project_query()`
collect.tbl_BigQueryConnection <- function(x, ...,
n = Inf,
Expand Down Expand Up @@ -333,6 +333,41 @@
RAND <- NULL # quiet R CMD check
dbplyr::sql_runif(RAND(), n = {{ n }}, min = min, max = max)
},
# clock functions
add_days = function(x, n, ...) {
check_dots_empty()
dbplyr::build_sql("DATE_ADD(CAST(", x, "AS DATE), INTERVAL ", n, " DAY)")
},
add_years = function(x, n, ...) {
check_dots_empty()
dbplyr::build_sql("DATE_ADD(CAST(", x, "AS DATE), INTERVAL ", n, " YEAR)")
},
date_build = function(year, month = 1L, day = 1L, ..., invalid = NULL) {
check_dots_empty()
dbplyr:::check_unsupported_arg(invalid, allow_null = TRUE)
dbplyr::build_sql("DATE(", year, ", ", month, ", ", day, ")")
},
date_count_between = function(start, end, precision, ..., n = 1L) {
check_dots_empty()
dbplyr:::check_unsupported_arg(precision, allowed = "DAY")
dbplyr:::check_unsupported_arg(n, allowed = 1L)
dbplyr::build_sql("DATE_DIFF(CAST(", end, " AS DATE), CAST(", start, " AS DATE), DAY)")
},
get_year = function(x) {
dbplyr::build_sql("EXTRACT(YEAR FROM ", x, ")")
},
get_month = function(x) {
dbplyr::build_sql("EXTRACT(MONTH FROM ", x, ")")
},
get_day = function(x) {
dbplyr::build_sql("EXTRACT(DAY FROM ", x, ")")
},
difftime = function(time1, time2, tz, units = "DAY") {
dbplyr:::check_unsupported_arg(tz)
dbplyr:::check_unsupported_arg(units, allowed = "DAY")

dbplyr::build_sql("DATE_DIFF(CAST(", time1, " AS DATE), CAST(", time2, " AS DATE), DAY)")
},
),
dbplyr::sql_translator(.parent = dbplyr::base_agg,
n = function() dplyr::sql("count(*)"),
Expand Down
69 changes: 69 additions & 0 deletions tests/testthat/test-dplyr.R
Original file line number Diff line number Diff line change
Expand Up @@ -224,3 +224,72 @@ test_that("can correctly print a lazy query", {
)
)
})

test_that("get clock functions translate to correct sql", {
skip_if_not_installed("dbplyr")

sql <- dbplyr::lazy_frame(x = "2008-12-25") %>%
dplyr::mutate(
year = get_year(x),
month = get_month(x),
day = get_day(x)
) %>%
dbplyr::sql_build(simulate_bigrquery())

expect_equal(sql$select[[2]], "EXTRACT(YEAR FROM `x`)")
expect_equal(sql$select[[3]], "EXTRACT(MONTH FROM `x`)")
expect_equal(sql$select[[4]], "EXTRACT(DAY FROM `x`)")
})


test_that("add clock functions translate to correct sql", {
skip_if_not_installed("dbplyr")

sql <- dbplyr::lazy_frame(x = "2008-12-25") %>%
dplyr::mutate(
date1 = add_years(x, 1L),
date2 = add_days(x, 1L)
) %>%
dbplyr::sql_build(simulate_bigrquery())

expect_equal(sql$select[[2]], "DATE_ADD(CAST(`x`AS DATE), INTERVAL 1 YEAR)")
expect_equal(sql$select[[3]], "DATE_ADD(CAST(`x`AS DATE), INTERVAL 1 DAY)")
})

test_that("date_build clock function translates to correct sql", {
skip_if_not_installed("dbplyr")

sql <- dbplyr::lazy_frame(y = "2008", m = "08", d = "1") %>%
dplyr::mutate(
full_date = date_build(y, m, d),
full_date2 = date_build(y)
) %>%
dbplyr::sql_build(simulate_bigrquery())

expect_equal(sql$select[[2]], "DATE(`y`, `m`, `d`)")
expect_equal(sql$select[[3]], "DATE(`y`, 1, 1)")
})

test_that("date_count_between clock function translates to correct sql", {
skip_if_not_installed("dbplyr")

sql <- dbplyr::lazy_frame(start = "2008-12-25", end = "2008-12-26") %>%
dplyr::mutate(
dcb = date_count_between(start, end, "DAY")
) %>%
dbplyr::sql_build(simulate_bigrquery())

expect_equal(sql$select[[2]], "DATE_DIFF(CAST(`end` AS DATE), CAST(`start` AS DATE), DAY)")
})

test_that("difftime clock function translates to correct sql", {
skip_if_not_installed("dbplyr")

sql <- dbplyr::lazy_frame(time1 = "2008-12-25", time2 = "2008-12-26") %>%
dplyr::mutate(
dcb = difftime(time1 = time1, time2 = time2, units = "DAY")
) %>%
dbplyr::sql_build(simulate_bigrquery())

expect_equal(sql$select[[2]], "DATE_DIFF(CAST(`time1` AS DATE), CAST(`time2` AS DATE), DAY)")
})
Loading