Skip to content
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@

* Fix `.keep` in `mutate()` to avoid accidentally dropping columns
(@Yousa-Mirage, #353).
* Better consistency with `dplyr` when using `NULL` in `mutate()` to drop
columns (@Yousa-Mirage, #355).
* Fix `relocate()` to handle `<tidy-select>` helpers consistently with `dplyr`
(@Yousa-Mirage, #357).

Expand Down
49 changes: 47 additions & 2 deletions R/mutate.R
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ mutate.polars_data_frame <- function(
used <- c()
orig_names <- names(.data)
current_names <- orig_names
grp_names <- grps
mutated_vars <- c()

for (i in seq_along(polars_exprs)) {
Expand All @@ -139,6 +140,35 @@ mutate.polars_data_frame <- function(
sub <- compact(sub)
mutated_vars <- c(mutated_vars, names(sub))

if (is_grouped) {
modified_grps <- intersect(c(names(sub), to_drop), grps)
modified_grps <- match(modified_grps, grps)
modified_grps <- modified_grps[
grp_names[modified_grps] == grps[modified_grps]
]
if (length(modified_grps) > 0) {
new_grp_names <- paste0(
"__tidypolars_mutate_group_",
modified_grps,
"__"
)
while (any(new_grp_names %in% c(current_names, grp_names))) {
new_grp_names <- paste0("_", new_grp_names)
}
grp_exprs <- unname(Map(
\(old, new) pl$col(old)$alias(new),
grps[modified_grps],
new_grp_names
))
.data <- .data$with_columns(!!!grp_exprs)
current_names <- c(
current_names,
setdiff(new_grp_names, current_names)
)
grp_names[modified_grps] <- new_grp_names
}
}

used <- c(
used,
lapply(sub, \(x) x$meta$root_names()) |>
Expand All @@ -154,9 +184,9 @@ mutate.polars_data_frame <- function(
if (!is.list(order_by)) {
order_by <- list(order_by)
}
x$over(!!!grps, order_by = order_by)
x$over(!!!grp_names, order_by = order_by)
} else {
x$over(!!!grps)
x$over(!!!grp_names)
}
})
}
Expand All @@ -171,6 +201,21 @@ mutate.polars_data_frame <- function(
}
}

extra_grps <- setdiff(grp_names, grps)
if (length(extra_grps) > 0) {
.data <- .data$drop(extra_grps)
current_names <- setdiff(current_names, extra_grps)
}

ordered_names <- c(
intersect(orig_names, current_names),
setdiff(current_names, orig_names)
)
if (!identical(current_names, ordered_names)) {
.data <- .data$select(!!!ordered_names)
current_names <- ordered_names
}

new_vars <- intersect(
setdiff(unique(mutated_vars), orig_names),
current_names
Expand Down
68 changes: 68 additions & 0 deletions tests/testthat/test-mutate-lazy.R
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,17 @@ test_that("dropping columns works", {
mutate(test_pl, Sepal.Length = 1, Species = NULL),
names(test_df)[1:4]
)

expect_equal_lazy(
mutate(test_pl, missing = NULL),
mutate(test_df, missing = NULL)
)

# Ensure correct column order, https://github.com/etiennebacher/tidypolars/issues/355
expect_equal_lazy(
mutate(test_pl, Sepal.Length = NULL, Sepal.Length = Sepal.Width + 1),
mutate(test_df, Sepal.Length = NULL, Sepal.Length = Sepal.Width + 1)
)
})

test_that("operations on grouped data work", {
Expand Down Expand Up @@ -197,6 +208,28 @@ test_that("operations on grouped data work", {
attr("maintain_grp_order")
)

expect_equal_lazy(
test_pl |>
mutate(Species = NULL, Species = Sepal.Width + 1, .by = Species),
test_df |>
mutate(Species = NULL, Species = Sepal.Width + 1, .by = Species)
)

expect_equal_lazy(
test_pl |>
mutate(
Species = Sepal.Width + 1,
foo = mean(Sepal.Length),
.by = Species,
),
test_df |>
mutate(
Species = Sepal.Width + 1,
foo = mean(Sepal.Length),
.by = Species,
)
)

test_df <- as_tibble(mtcars)
test_pl <- as_polars_lf(test_df)

Expand All @@ -212,6 +245,30 @@ test_that("operations on grouped data work", {
tolerance = 1e-5
)

expect_equal_lazy(
test_pl |>
group_by(cyl, am) |>
mutate(
cyl = NULL,
cyl = disp + 1,
hp2 = mean(hp),
am = NULL,
am = gear + 1
) |>
ungroup(),
test_df |>
group_by(cyl, am) |>
mutate(
cyl = NULL,
cyl = disp + 1,
hp2 = mean(hp),
am = NULL,
am = gear + 1
) |>
ungroup(),
tolerance = 1e-5
)

test_df <- as_tibble(iris)
test_pl <- as_polars_lf(test_df)

Expand All @@ -220,6 +277,17 @@ test_that("operations on grouped data work", {
names(test_df)[2:5]
)

expect_equal_lazy(
test_pl |>
group_by(Species) |>
mutate(Species = NULL, Species = Sepal.Width + 1) |>
ungroup(),
test_df |>
group_by(Species) |>
mutate(Species = NULL, Species = Sepal.Width + 1) |>
ungroup()
)

test_df <- as_tibble(mtcars)
test_pl <- as_polars_lf(test_df)

Expand Down
68 changes: 68 additions & 0 deletions tests/testthat/test-mutate.R
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,17 @@ test_that("dropping columns works", {
mutate(test_pl, Sepal.Length = 1, Species = NULL),
names(test_df)[1:4]
)

expect_equal(
mutate(test_pl, missing = NULL),
mutate(test_df, missing = NULL)
)

# Ensure correct column order, https://github.com/etiennebacher/tidypolars/issues/355
expect_equal(
Comment thread
Yousa-Mirage marked this conversation as resolved.
mutate(test_pl, Sepal.Length = NULL, Sepal.Length = Sepal.Width + 1),
mutate(test_df, Sepal.Length = NULL, Sepal.Length = Sepal.Width + 1)
)
})

test_that("operations on grouped data work", {
Expand Down Expand Up @@ -193,6 +204,28 @@ test_that("operations on grouped data work", {
attr("maintain_grp_order")
)

expect_equal(
test_pl |>
mutate(Species = NULL, Species = Sepal.Width + 1, .by = Species),
test_df |>
mutate(Species = NULL, Species = Sepal.Width + 1, .by = Species)
)

expect_equal(
test_pl |>
mutate(
Species = Sepal.Width + 1,
foo = mean(Sepal.Length),
.by = Species,
),
test_df |>
mutate(
Species = Sepal.Width + 1,
foo = mean(Sepal.Length),
.by = Species,
)
)
Comment on lines +214 to +227
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does this test add compared to the existing tests?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is just a regression prevention test that tests the behavior when the grouping variable is modified. You can delete it if you feel it is not necessary. The test in that comment below was really unnecessary and I've deleted it.


test_df <- as_tibble(mtcars)
test_pl <- as_polars_df(test_df)

Expand All @@ -208,6 +241,30 @@ test_that("operations on grouped data work", {
tolerance = 1e-5
)

expect_equal(
test_pl |>
group_by(cyl, am) |>
mutate(
cyl = NULL,
cyl = disp + 1,
hp2 = mean(hp),
am = NULL,
am = gear + 1
) |>
ungroup(),
test_df |>
group_by(cyl, am) |>
mutate(
cyl = NULL,
cyl = disp + 1,
hp2 = mean(hp),
am = NULL,
am = gear + 1
) |>
ungroup(),
tolerance = 1e-5
)

test_df <- as_tibble(iris)
test_pl <- as_polars_df(test_df)

Expand All @@ -216,6 +273,17 @@ test_that("operations on grouped data work", {
names(test_df)[2:5]
)

expect_equal(
test_pl |>
group_by(Species) |>
mutate(Species = NULL, Species = Sepal.Width + 1) |>
ungroup(),
test_df |>
group_by(Species) |>
mutate(Species = NULL, Species = Sepal.Width + 1) |>
ungroup()
)

test_df <- as_tibble(mtcars)
test_pl <- as_polars_df(test_df)

Expand Down
Loading