From eed067ba0cb391916742495665f949e3df2072e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Thu, 7 Mar 2024 05:33:37 +0100 Subject: [PATCH] group_modify --- NAMESPACE | 1 + R/group_modify.R | 37 ++++++++++++++++++++++++++++ R/overwrite.R | 1 + R/restore.R | 1 + dplyr-methods/group_modify.txt | 8 ++++++ tests/testthat/test-as_duckplyr_df.R | 16 ++++++++++++ tests/testthat/test-group-map.R | 32 ++++++++++++------------ tools/00-funs.R | 3 ++- 8 files changed, 82 insertions(+), 17 deletions(-) create mode 100644 R/group_modify.R create mode 100644 dplyr-methods/group_modify.txt diff --git a/NAMESPACE b/NAMESPACE index 78d479b1..d691db62 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -18,6 +18,7 @@ S3method(group_by,duckplyr_df) S3method(group_data,duckplyr_df) S3method(group_keys,duckplyr_df) S3method(group_map,duckplyr_df) +S3method(group_modify,duckplyr_df) S3method(group_vars,duckplyr_df) S3method(head,duckplyr_df) S3method(inner_join,duckplyr_df) diff --git a/R/group_modify.R b/R/group_modify.R new file mode 100644 index 00000000..7412e9eb --- /dev/null +++ b/R/group_modify.R @@ -0,0 +1,37 @@ +# Generated by 02-duckplyr_df-methods.R +#' @export +group_modify.duckplyr_df <- function(.data, .f, ..., .keep = FALSE, keep = deprecated()) { + # Our implementation + rel_try( + # Always fall back to dplyr + "No relational implementation for group_modify()" = TRUE, + { + return(out) + } + ) + + # dplyr forward + group_modify <- dplyr$group_modify.data.frame + out <- group_modify(.data, .f, ..., .keep = .keep, keep = keep) + return(out) + + # dplyr implementation + if (!missing(keep)) { + lifecycle::deprecate_warn("1.0.0", "group_modify(keep = )", "group_modify(.keep = )", always = TRUE) + .keep <- keep + } + .f <- as_group_map_function(.f) + .f(.data, group_keys(.data), ...) +} + +duckplyr_group_modify <- function(.data, ...) { + try_fetch( + .data <- as_duckplyr_df(.data), + error = function(e) { + testthat::skip(conditionMessage(e)) + } + ) + out <- group_modify(.data, ...) + class(out) <- setdiff(class(out), "duckplyr_df") + out +} diff --git a/R/overwrite.R b/R/overwrite.R index e0836c4c..6f0864bc 100644 --- a/R/overwrite.R +++ b/R/overwrite.R @@ -18,6 +18,7 @@ methods_overwrite <- function() { vctrs::s3_register("dplyr::group_data", "data.frame", group_data.duckplyr_df) vctrs::s3_register("dplyr::group_keys", "data.frame", group_keys.duckplyr_df) vctrs::s3_register("dplyr::group_map", "data.frame", group_map.duckplyr_df) + vctrs::s3_register("dplyr::group_modify", "data.frame", group_modify.duckplyr_df) vctrs::s3_register("dplyr::group_vars", "data.frame", group_vars.duckplyr_df) vctrs::s3_register("dplyr::inner_join", "data.frame", inner_join.duckplyr_df) vctrs::s3_register("dplyr::intersect", "data.frame", intersect.duckplyr_df) diff --git a/R/restore.R b/R/restore.R index aa095f92..de62daed 100644 --- a/R/restore.R +++ b/R/restore.R @@ -18,6 +18,7 @@ methods_restore <- function() { vctrs::s3_register("dplyr::group_data", "data.frame", dplyr$group_data.data.frame) vctrs::s3_register("dplyr::group_keys", "data.frame", dplyr$group_keys.data.frame) vctrs::s3_register("dplyr::group_map", "data.frame", dplyr$group_map.data.frame) + vctrs::s3_register("dplyr::group_modify", "data.frame", dplyr$group_modify.data.frame) vctrs::s3_register("dplyr::group_vars", "data.frame", dplyr$group_vars.data.frame) vctrs::s3_register("dplyr::inner_join", "data.frame", dplyr$inner_join.data.frame) vctrs::s3_register("dplyr::intersect", "data.frame", dplyr$intersect.data.frame) diff --git a/dplyr-methods/group_modify.txt b/dplyr-methods/group_modify.txt new file mode 100644 index 00000000..50609825 --- /dev/null +++ b/dplyr-methods/group_modify.txt @@ -0,0 +1,8 @@ +group_modify.data.frame <- function(.data, .f, ..., .keep = FALSE, keep = deprecated()) { + if (!missing(keep)) { + lifecycle::deprecate_warn("1.0.0", "group_modify(keep = )", "group_modify(.keep = )", always = TRUE) + .keep <- keep + } + .f <- as_group_map_function(.f) + .f(.data, group_keys(.data), ...) +} diff --git a/tests/testthat/test-as_duckplyr_df.R b/tests/testthat/test-as_duckplyr_df.R index dbd0eae6..35a2f9fb 100644 --- a/tests/testthat/test-as_duckplyr_df.R +++ b/tests/testthat/test-as_duckplyr_df.R @@ -802,6 +802,22 @@ test_that("as_duckplyr_df() and group_map(~ .x)", { expect_equal(pre, post) }) +test_that("as_duckplyr_df() and group_modify(~ .x)", { + withr::local_envvar(DUCKPLYR_FORCE = "FALSE") + + skip("Grouped") + + # Data + test_df <- data.frame(a = 1:6 + 0, b = 2, g = rep(1:3, 1:3)) + + # Run + pre <- test_df %>% as_duckplyr_df() %>% group_modify(~ .x) + post <- test_df %>% group_modify(~ .x) %>% as_duckplyr_df() + + # Compare + expect_equal(pre, post) +}) + test_that("as_duckplyr_df() and group_vars()", { withr::local_envvar(DUCKPLYR_FALLBACK_FORCE = "TRUE") diff --git a/tests/testthat/test-group-map.R b/tests/testthat/test-group-map.R index 95f40f6f..bf0d9437 100644 --- a/tests/testthat/test-group-map.R +++ b/tests/testthat/test-group-map.R @@ -30,9 +30,9 @@ test_that("duckplyr_group_map() works on ungrouped data frames (#4067)", { ) }) -test_that("group_modify() makes a grouped_df", { +test_that("duckplyr_group_modify() makes a grouped_df", { res <- duckplyr_group_by(mtcars, cyl) %>% - group_modify(~ head(.x, 2L)) + duckplyr_group_modify(~ head(.x, 2L)) expect_equal(nrow(res), 6L) expect_equal(group_rows(res), list_of(1:2, 3:4, 5:6)) @@ -40,30 +40,30 @@ test_that("group_modify() makes a grouped_df", { res <- iris %>% duckplyr_group_by(Species) %>% duckplyr_filter(Species == "setosa") %>% - group_modify(~ tally(.x)) + duckplyr_group_modify(~ tally(.x)) expect_equal(nrow(res), 1L) expect_equal(group_rows(res), list_of(1L)) res <- iris %>% duckplyr_group_by(Species, .drop = FALSE) %>% duckplyr_filter(Species == "setosa") %>% - group_modify(~ tally(.x)) + duckplyr_group_modify(~ tally(.x)) expect_equal(nrow(res), 3L) expect_equal(as.list(group_rows(res)), list(1L, 2L, 3L)) }) -test_that("group_modify() and duckplyr_group_map() want functions with at least 2 arguments, or ... (#3996)", { +test_that("duckplyr_group_modify() and duckplyr_group_map() want functions with at least 2 arguments, or ... (#3996)", { head1 <- function(d, ...) head(d, 1) g <- iris %>% duckplyr_group_by(Species) - expect_equal(nrow(group_modify(g, head1)), 3L) + expect_equal(nrow(duckplyr_group_modify(g, head1)), 3L) expect_equal(length(duckplyr_group_map(g, head1)), 3L) }) -test_that("group_modify() works on ungrouped data frames (#4067)", { +test_that("duckplyr_group_modify() works on ungrouped data frames (#4067)", { skip("TODO duckdb") expect_identical( - group_modify(mtcars, ~ head(.x, 2L)), + duckplyr_group_modify(mtcars, ~ head(.x, 2L)), head(mtcars, 2L) ) }) @@ -80,15 +80,15 @@ test_that("duckplyr_group_map() uses ptype on empty splits (#4421)", { expect_s3_class(ptype, "data.frame") }) -test_that("group_modify() uses ptype on empty splits (#4421)", { +test_that("duckplyr_group_modify() uses ptype on empty splits (#4421)", { res <- mtcars %>% duckplyr_group_by(cyl) %>% duckplyr_filter(hp > 1000) %>% - group_modify(~.x) + duckplyr_group_modify(~.x) expect_equal(res, duckplyr_group_by(mtcars[integer(0L), names(res)], cyl)) }) -test_that("group_modify() works with additional arguments (#4509)", { +test_that("duckplyr_group_modify() works with additional arguments (#4509)", { myfun <- function(.x, .y, foo) { .x[[foo]] <- 1 .x @@ -103,7 +103,7 @@ test_that("group_modify() works with additional arguments (#4509)", { targetdata$bar <- 1 expect_equal( - group_modify(.data = srcdata, .f = myfun, foo = "bar"), + duckplyr_group_modify(.data = srcdata, .f = myfun, foo = "bar"), targetdata ) }) @@ -119,10 +119,10 @@ test_that("duckplyr_group_map() give meaningful errors", { head1 <- function(d) head(d, 1) expect_snapshot({ - # group_modify() - (expect_error(mtcars %>% duckplyr_group_by(cyl) %>% group_modify(~ data.frame(cyl = 19)))) - (expect_error(mtcars %>% duckplyr_group_by(cyl) %>% group_modify(~ 10))) - (expect_error(iris %>% duckplyr_group_by(Species) %>% group_modify(head1))) + # duckplyr_group_modify() + (expect_error(mtcars %>% duckplyr_group_by(cyl) %>% duckplyr_group_modify(~ data.frame(cyl = 19)))) + (expect_error(mtcars %>% duckplyr_group_by(cyl) %>% duckplyr_group_modify(~ 10))) + (expect_error(iris %>% duckplyr_group_by(Species) %>% duckplyr_group_modify(head1))) # duckplyr_group_map() (expect_error(iris %>% duckplyr_group_by(Species) %>% duckplyr_group_map(head1))) diff --git a/tools/00-funs.R b/tools/00-funs.R index c0c031f2..96428afe 100644 --- a/tools/00-funs.R +++ b/tools/00-funs.R @@ -16,7 +16,7 @@ df_methods <- filter(!grepl("_$|^as[.]tbl$", name)) %>% # special dplyr methods, won't implement filter(!(name %in% c( - "group_indices", "group_modify", "group_nest", "group_size", "group_split", "group_trim", "groups", "n_groups", + "group_indices", "group_nest", "group_size", "group_split", "group_trim", "groups", "n_groups", "same_src", # data frames can be copied into duck-frames with zero cost NULL ))) %>% @@ -28,6 +28,7 @@ df_methods <- "group_data", "group_keys", "group_map", + "group_modify", "rowwise", NULL ))) %>%