@@ -460,22 +460,22 @@ sd_ungroup <- function(.data) {
460460# ' @param ... Aggregate expressions. These are evaluated in the same way as
461461# ' [dplyr::summarise()] except the outer expression must be an aggregate
462462# ' expression (e.g., `sum(x) + 1` is not currently possible).
463+ # ' @param .env The calling environment for programmatic usage
463464# '
464465# ' @returns An object of class sedonadb_dataframe
465466# ' @export
466467# '
467468# ' @examples
468469# ' data.frame(x = c(10:1, NA)) |> sd_summarise(x = sum(x, na.rm = TRUE))
469470# '
470- sd_summarise <- function (.data , ... ) {
471+ sd_summarise <- function (.data , ... , .env = parent.frame() ) {
471472 .data <- as_sedonadb_dataframe(.data )
472473
473474 expr_quos <- rlang :: enquos(... )
474- env <- parent.frame()
475475
476- expr_ctx <- sd_expr_ctx(infer_nanoarrow_schema(.data ), env , ctx = .data $ ctx )
476+ expr_ctx <- sd_expr_ctx(infer_nanoarrow_schema(.data ), . env , ctx = .data $ ctx )
477477 r_exprs <- expr_quos | > rlang :: quos_auto_name() | > lapply(rlang :: quo_get_expr )
478- sd_exprs <- lapply(r_exprs , sd_eval_expr , expr_ctx = expr_ctx , env = env )
478+ sd_exprs <- lapply(r_exprs , sd_eval_expr , expr_ctx = expr_ctx )
479479
480480 # Ensure inputs are given aliases to account for the expected column name
481481 exprs_names <- names(r_exprs )
@@ -492,8 +492,88 @@ sd_summarise <- function(.data, ...) {
492492
493493# ' @rdname sd_summarise
494494# ' @export
495- sd_summarize <- function (.data , ... ) {
496- sd_summarise(.data , ... )
495+ sd_summarize <- function (.data , ... , .env = parent.frame()) {
496+ sd_summarise(.data , ... , .env = .env )
497+ }
498+
499+ # ' Join two SedonaDB DataFrames
500+ # '
501+ # ' Perform a join operation between two dataframes. Use [sd_join_by()] to
502+ # ' specify join conditions using `x$column` and `y$column` syntax to
503+ # ' reference columns from the left and right tables respectively.
504+ # '
505+ # ' @param x The left dataframe
506+ # ' @param y The right dataframe (will use the same context as x)
507+ # ' @param by Join specification. One of:
508+ # ' - A `sedonadb_join_by` object from [sd_join_by()]
509+ # ' - A character vector of column names to join on in both tables
510+ # ' - A named character vector mapping left-table column names to
511+ # ' right-table column names, e.g. `c(x_val = "y_val")`
512+ # ' - `NULL` for a natural join on columns with matching names
513+ # ' @param join_type The type of join to perform. One of "inner", "left", "right",
514+ # ' "full", "leftsemi", "rightsemi", "leftanti", "rightanti", "leftmark",
515+ # ' or "rightmark".
516+ # ' @param select Post-join column selection. One of
517+ # ' - `NULL` for no modification, which may result in duplicate (unqualified)
518+ # ' column names. The column may still be
519+ # ' referred to with a qualifier in advanced usage using [sd_expr_column()].
520+ # ' - [sd_join_select_default()] for dplyr-like behaviour (equi-join keys
521+ # ' removed, intersecting names suffixed)
522+ # ' - [sd_join_select()] for a custom selection
523+ # '
524+ # ' @returns An object of class sedonadb_dataframe
525+ # ' @export
526+ # '
527+ # ' @examples
528+ # ' df1 <- data.frame(x = letters[1:10], y = 1:10)
529+ # ' df2 <- data.frame(y = 10:1, z = LETTERS[1:10])
530+ # ' df1 |> sd_join(df2)
531+ # '
532+ sd_join <- function (
533+ x ,
534+ y ,
535+ by = NULL ,
536+ join_type = " inner" ,
537+ select = sd_join_select_default()
538+ ) {
539+ x <- as_sedonadb_dataframe(x )
540+ y <- as_sedonadb_dataframe(y , ctx = x $ ctx )
541+
542+ x_schema <- infer_nanoarrow_schema(x )
543+ y_schema <- infer_nanoarrow_schema(y )
544+ join_expr_ctx <- sd_join_expr_ctx(x_schema , y_schema , ctx = x $ ctx )
545+ join_conditions <- sd_build_join_conditions(join_expr_ctx , by , ctx = x $ ctx )
546+
547+ df <- x $ df $ join(y $ df , join_conditions , join_type , left_alias = " x" , right_alias = " y" )
548+ out <- new_sedonadb_dataframe(x $ ctx , df )
549+
550+ # Apply post-join column selection if needed
551+ if (is.null(select )) {
552+ projection <- NULL
553+ } else if (inherits(select , " sedonadb_join_select_default" )) {
554+ # Default select: remove duplicate equijoin keys, apply suffixes
555+ projection <- sd_build_default_select(
556+ join_expr_ctx ,
557+ join_conditions ,
558+ select $ suffix ,
559+ join_type
560+ )
561+ } else if (inherits(select , " sedonadb_join_select" )) {
562+ # Custom select: evaluate user expressions
563+ projection <- sd_eval_join_select_exprs(select , join_expr_ctx )
564+ } else {
565+ stop(
566+ " `select` must be NULL, sd_join_select_default(), or sd_join_select()" ,
567+ call. = FALSE
568+ )
569+ }
570+
571+ # NULL return from these functions means that no extra projecting is needed
572+ if (is.null(projection )) {
573+ out
574+ } else {
575+ sd_transmute(out , !!! projection )
576+ }
497577}
498578
499579# ' Write DataFrame to (Geo)Parquet files
0 commit comments