Skip to content

Commit 2b0a8e9

Browse files
committed
break up dst_interpolate_aw into smaller modules with helpers
1 parent c391a3c commit 2b0a8e9

File tree

9 files changed

+838
-251
lines changed

9 files changed

+838
-251
lines changed

R/dst_interpolate_aw.R

Lines changed: 103 additions & 251 deletions
Large diffs are not rendered by default.

R/duckdb_aggregate.R

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
#' Compile aggregation view for AW/IW (area/length/count kernels)
2+
#' @keywords internal
3+
ducksf_compile_aggs <- function(
4+
con,
5+
ex_vars = character(0),
6+
in_vars = character(0),
7+
measure = c("area", "length", "count"),
8+
overlap_view = "overlap",
9+
source_proj_view = "source_proj",
10+
denom_sid_view = "total_by_sid",
11+
denom_tid_view = "total_by_tid",
12+
denom_sid_alias = NULL, # defaults to total_<measure>_sid
13+
denom_tid_alias = NULL, # defaults to total_<measure>_tid
14+
out_view = "interpolated_all",
15+
tid_col = "tid" # id on the overlap/target side
16+
) {
17+
measure <- match.arg(measure)
18+
overlap_col <- switch(
19+
measure,
20+
area = "overlap_area",
21+
length = "overlap_length",
22+
count = "overlap_count"
23+
)
24+
denom_sid_alias <- denom_sid_alias %||% paste0("total_", measure, "_sid")
25+
denom_tid_alias <- denom_tid_alias %||% paste0("total_", measure, "_tid")
26+
27+
ov <- DBI::dbQuoteIdentifier(con, overlap_view)
28+
sp <- DBI::dbQuoteIdentifier(con, source_proj_view)
29+
tbs <- DBI::dbQuoteIdentifier(con, denom_sid_view)
30+
tbt <- DBI::dbQuoteIdentifier(con, denom_tid_view)
31+
out <- DBI::dbQuoteIdentifier(con, out_view)
32+
33+
tidq <- DBI::dbQuoteIdentifier(con, tid_col)
34+
ovq <- DBI::dbQuoteIdentifier(con, overlap_col)
35+
den_sid_q <- DBI::dbQuoteIdentifier(con, denom_sid_alias)
36+
den_tid_q <- DBI::dbQuoteIdentifier(con, denom_tid_alias)
37+
38+
# Build SELECT expressions
39+
ex_exprs <- character(0)
40+
if (length(ex_vars)) {
41+
ex_exprs <- paste0(
42+
"SUM((src.",
43+
DBI::dbQuoteIdentifier(con, ex_vars),
44+
") * o.",
45+
ovq,
46+
" / NULLIF(tbs.",
47+
den_sid_q,
48+
", 0)) AS ",
49+
DBI::dbQuoteIdentifier(con, ex_vars)
50+
)
51+
}
52+
53+
in_exprs <- character(0)
54+
if (length(in_vars)) {
55+
in_exprs <- paste0(
56+
"SUM((src.",
57+
DBI::dbQuoteIdentifier(con, in_vars),
58+
") * o.",
59+
ovq,
60+
" / NULLIF(tbt.",
61+
den_tid_q,
62+
", 0)) AS ",
63+
DBI::dbQuoteIdentifier(con, in_vars)
64+
)
65+
}
66+
67+
select_expr <- paste(c(ex_exprs, in_exprs), collapse = ",\n ")
68+
69+
# Conditional joins
70+
join_src <- if (length(c(ex_vars, in_vars)) > 0) {
71+
paste("JOIN", sp, "src USING (sid)")
72+
} else {
73+
""
74+
}
75+
join_sid <- if (length(ex_vars) > 0) {
76+
paste("LEFT JOIN", tbs, "tbs USING (sid)")
77+
} else {
78+
""
79+
}
80+
join_tid <- if (length(in_vars) > 0) {
81+
paste("LEFT JOIN", tbt, "tbt USING (tid)")
82+
} else {
83+
""
84+
}
85+
86+
sql <- glue::glue(
87+
"
88+
CREATE OR REPLACE VIEW {out} AS
89+
SELECT
90+
o.{tidq}
91+
{ if (nzchar(select_expr)) paste0(',\n ', select_expr) else '' }
92+
FROM {ov} o
93+
{join_src}
94+
{join_sid}
95+
{join_tid}
96+
GROUP BY o.{tidq};
97+
"
98+
)
99+
100+
DBI::dbExecute(con, sql)
101+
invisible(out_view)
102+
}

R/duckdb_conn.R

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#' @keywords internal
2+
ducksf_duck_connect <- function(
3+
threads = NULL,
4+
load_spatial = TRUE,
5+
load_geoarrow = TRUE,
6+
database = ":memory:",
7+
read_only = FALSE
8+
) {
9+
con <- DBI::dbConnect(duckdb::duckdb(database, read_only = read_only))
10+
11+
if (isTRUE(load_spatial)) {
12+
# INSTALL is idempotent; keep it robust across DuckDB versions
13+
try(DBI::dbExecute(con, "INSTALL spatial;"), silent = TRUE)
14+
DBI::dbExecute(con, "LOAD spatial;")
15+
}
16+
17+
if (isTRUE(load_geoarrow)) {
18+
# Available in recent DuckDB builds; ignore if missing
19+
try(
20+
DBI::dbExecute(con, "CALL register_geoarrow_extensions();"),
21+
silent = TRUE
22+
)
23+
}
24+
25+
if (!is.null(threads)) {
26+
threads <- as.integer(threads)
27+
if (!is.na(threads) && threads > 0L) {
28+
try(
29+
DBI::dbExecute(con, paste0("SET threads=", threads, ";")),
30+
silent = TRUE
31+
)
32+
}
33+
}
34+
35+
con
36+
}
37+
38+
#' @keywords internal
39+
ducksf_duck_disconnect <- function(con) {
40+
try(
41+
{
42+
if (inherits(con, "DBIConnection") && DBI::dbIsValid(con)) {
43+
DBI::dbDisconnect(con)
44+
}
45+
},
46+
silent = TRUE
47+
)
48+
invisible(TRUE)
49+
}

R/duckdb_denominators.R

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
#' Build denominator table/view for areal/length/count kernels
2+
#' @keywords internal
3+
ducksf_build_denom <- function(
4+
con,
5+
by = c("sid", "tid"),
6+
mode = NULL, # "sum" or "total" (only relevant for by="sid")
7+
measure = c("area", "length", "count"),
8+
overlap_view = "overlap",
9+
src_proj_view = "source_proj",
10+
out_view = NULL, # defaults to "total_by_<by>"
11+
id_col = NULL, # defaults to by
12+
alias = NULL # defaults to "total_<measure>_<by>"
13+
) {
14+
by <- match.arg(by)
15+
measure <- match.arg(measure)
16+
mode <- if (is.null(mode)) {
17+
if (by == "tid") "sum" else "sum"
18+
} else {
19+
match.arg(mode, c("sum", "total"))
20+
}
21+
22+
# defaults for names
23+
out_view <- out_view %||% paste0("total_by_", by)
24+
id_col <- id_col %||% by
25+
alias <- alias %||% paste0("total_", measure, "_", by)
26+
27+
ov <- DBI::dbQuoteIdentifier(con, overlap_view)
28+
sp <- DBI::dbQuoteIdentifier(con, src_proj_view)
29+
out <- DBI::dbQuoteIdentifier(con, out_view)
30+
idq <- DBI::dbQuoteIdentifier(con, id_col)
31+
alia <- DBI::dbQuoteIdentifier(con, alias)
32+
33+
# overlap measure column name
34+
ov_col <- switch(
35+
measure,
36+
area = "overlap_area",
37+
length = "overlap_length",
38+
count = "overlap_count"
39+
)
40+
ovq <- DBI::dbQuoteIdentifier(con, ov_col)
41+
42+
if (by == "tid") {
43+
if (mode == "total") {
44+
stop("mode='total' is not defined for by='tid'. Use mode='sum'.")
45+
}
46+
DBI::dbExecute(
47+
con,
48+
glue::glue(
49+
"
50+
CREATE OR REPLACE VIEW {out} AS
51+
SELECT {idq} AS {idq}, SUM({ovq}) AS {alia}
52+
FROM {ov}
53+
GROUP BY {idq};
54+
"
55+
)
56+
)
57+
} else {
58+
# by == "sid"
59+
if (mode == "sum") {
60+
DBI::dbExecute(
61+
con,
62+
glue::glue(
63+
"
64+
CREATE OR REPLACE VIEW {out} AS
65+
SELECT {idq} AS {idq}, SUM({ovq}) AS {alia}
66+
FROM {ov}
67+
GROUP BY {idq};
68+
"
69+
)
70+
)
71+
} else {
72+
# mode == "total"
73+
if (measure == "count") {
74+
stop("mode='total' is not defined for measure='count'.")
75+
}
76+
geom_expr <- if (measure == "area") "ST_Area(geom)" else "ST_Length(geom)"
77+
DBI::dbExecute(
78+
con,
79+
glue::glue(
80+
"
81+
CREATE OR REPLACE VIEW {out} AS
82+
SELECT {idq} AS {idq}, {geom_expr} AS {alia}
83+
FROM {sp};
84+
"
85+
)
86+
)
87+
}
88+
}
89+
90+
invisible(list(
91+
out_view = out_view,
92+
alias = alias,
93+
by = by,
94+
measure = measure,
95+
mode = mode
96+
))
97+
}

R/duckdb_export.R

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
#' Make sf-ready view (tid + geometry + interpolated cols)
2+
#' @keywords internal
3+
ducksf_build_interpolated_sf_view <- function(
4+
con,
5+
target_tbl = "target_tbl",
6+
ia_view = "interpolated_all",
7+
out_view = "interpolated_sf"
8+
) {
9+
tt <- DBI::dbQuoteIdentifier(con, target_tbl)
10+
ia <- DBI::dbQuoteIdentifier(con, ia_view)
11+
out <- DBI::dbQuoteIdentifier(con, out_view)
12+
DBI::dbExecute(
13+
con,
14+
glue::glue(
15+
"
16+
CREATE OR REPLACE VIEW {out} AS
17+
SELECT t.tid, t.geom AS geometry, ia.*
18+
FROM {tt} t
19+
LEFT JOIN {ia} ia USING (tid);
20+
"
21+
)
22+
)
23+
invisible(out_view)
24+
}
25+
26+
#' Collect result as sf or tibble, mirroring current behavior
27+
#' @keywords internal
28+
ducksf_collect_output <- function(
29+
con,
30+
output = c("sf", "tibble"),
31+
target_sf,
32+
target_crs,
33+
tidQN,
34+
nameConflict = FALSE,
35+
tidOrig = NULL,
36+
ia_view = "interpolated_all",
37+
isf_view = "interpolated_sf"
38+
) {
39+
output <- match.arg(output)
40+
if (output == "sf") {
41+
x <- dplyr::tbl(con, isf_view) |> arrow::to_arrow()
42+
tmp <- sf::st_as_sf(x, crs = target_crs)
43+
out <- dplyr::left_join(
44+
target_sf,
45+
dplyr::as_tibble(tmp),
46+
by = dplyr::join_by(!!tidQN == "tid")
47+
)
48+
} else {
49+
x <- dplyr::tbl(con, ia_view) |> arrow::to_arrow()
50+
out <- dplyr::as_tibble(x)
51+
tgt_id <- dplyr::select(
52+
sf::st_drop_geometry(target_sf),
53+
!!rlang::sym(tidQN)
54+
)
55+
out <- dplyr::left_join(tgt_id, out, by = dplyr::join_by(!!tidQN == "tid"))
56+
}
57+
if (isTRUE(nameConflict) && !is.null(tidOrig)) {
58+
out <- dplyr::rename(out, !!rlang::set_names(rlang::sym(tidQN), tidOrig))
59+
}
60+
out
61+
}
62+
63+
#' Apply keep_NA semantics (drop targets with all-NA on requested vars)
64+
#' @keywords internal
65+
ducksf_apply_keep_na <- function(
66+
out,
67+
output = c("sf", "tibble"),
68+
vars_used,
69+
keep_NA
70+
) {
71+
output <- match.arg(output)
72+
if (isTRUE(keep_NA) || !length(vars_used)) {
73+
return(out)
74+
}
75+
if (output == "sf") {
76+
df <- sf::st_drop_geometry(out)
77+
keep_t <- rowSums(!is.na(df[, vars_used, drop = FALSE])) > 0
78+
out[keep_t, , drop = FALSE]
79+
} else {
80+
keep_t <- rowSums(!is.na(out[, vars_used, drop = FALSE])) > 0
81+
out[keep_t, , drop = FALSE]
82+
}
83+
}

0 commit comments

Comments
 (0)