Skip to content

Commit

Permalink
group_map
Browse files Browse the repository at this point in the history
  • Loading branch information
krlmlr committed Mar 7, 2024
1 parent f418434 commit e81f3ed
Show file tree
Hide file tree
Showing 11 changed files with 115 additions and 47 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ S3method(full_join,duckplyr_df)
S3method(group_by,duckplyr_df)
S3method(group_data,duckplyr_df)
S3method(group_keys,duckplyr_df)
S3method(group_map,duckplyr_df)
S3method(group_vars,duckplyr_df)
S3method(head,duckplyr_df)
S3method(inner_join,duckplyr_df)
Expand Down Expand Up @@ -253,6 +254,7 @@ importFrom(dplyr,group_map)
importFrom(dplyr,group_modify)
importFrom(dplyr,group_rows)
importFrom(dplyr,group_size)
importFrom(dplyr,group_split)
importFrom(dplyr,group_vars)
importFrom(dplyr,group_walk)
importFrom(dplyr,grouped_df)
Expand Down
1 change: 1 addition & 0 deletions R/dplyr.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
dplyr <- asNamespace("dplyr")

as_across_fn_call <- dplyr$as_across_fn_call
as_group_map_function <- dplyr$as_group_map_function
as_fun_list <- dplyr$as_fun_list
as_join_by <- dplyr$as_join_by
check_filter <- dplyr$check_filter
Expand Down
1 change: 1 addition & 0 deletions R/duckplyr-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#' @importFrom collections dict
#' @importFrom collections queue
#' @importFrom dplyr group_by_prepare
#' @importFrom dplyr group_split
#' @importFrom dplyr grouped_df
#' @importFrom dplyr if_else
#' @importFrom glue glue
Expand Down
52 changes: 52 additions & 0 deletions R/group_map.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Generated by 02-duckplyr_df-methods.R
#' @export
group_map.duckplyr_df <- function(.data, .f, ..., .keep = FALSE, keep = deprecated()) {
# Our implementation
rel_try(
# Always fall back to dplyr
"No relational implementation for group_map()" = TRUE,
{
return(out)
}
)

# dplyr forward
group_map <- dplyr$group_map.data.frame
out <- group_map(.data, .f, ..., .keep = .keep, keep = keep)
return(out)

# dplyr implementation
if (!missing(keep)) {
lifecycle::deprecate_warn("1.0.0", "group_map(keep = )", "group_map(.keep = )", always = TRUE)
.keep <- keep
}
.f <- as_group_map_function(.f)

# call the function on each group
chunks <- if (is_grouped_df(.data)) {
group_split(.data, .keep = isTRUE(.keep))
} else {
group_split(.data)
}
keys <- group_keys(.data)
group_keys <- map(seq_len(nrow(keys)), function(i) keys[i, , drop = FALSE])

if (length(chunks)) {
map2(chunks, group_keys, .f, ...)
} else {
# calling .f with .x and .y set to prototypes
structure(list(), ptype = .f(attr(chunks, "ptype"), keys[integer(0L), ], ...))
}
}

duckplyr_group_map <- function(.data, ...) {
try_fetch(
.data <- as_duckplyr_df(.data),
error = function(e) {
testthat::skip(conditionMessage(e))
}
)
out <- group_map(.data, ...)
class(out) <- setdiff(class(out), "duckplyr_df")
out
}
1 change: 1 addition & 0 deletions R/overwrite.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ methods_overwrite <- function() {
vctrs::s3_register("dplyr::group_by", "data.frame", group_by.duckplyr_df)
vctrs::s3_register("dplyr::group_data", "data.frame", group_data.duckplyr_df)
vctrs::s3_register("dplyr::group_keys", "data.frame", group_keys.duckplyr_df)
vctrs::s3_register("dplyr::group_map", "data.frame", group_map.duckplyr_df)
vctrs::s3_register("dplyr::group_vars", "data.frame", group_vars.duckplyr_df)
vctrs::s3_register("dplyr::inner_join", "data.frame", inner_join.duckplyr_df)
vctrs::s3_register("dplyr::intersect", "data.frame", intersect.duckplyr_df)
Expand Down
1 change: 1 addition & 0 deletions R/restore.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ methods_restore <- function() {
vctrs::s3_register("dplyr::group_by", "data.frame", dplyr$group_by.data.frame)
vctrs::s3_register("dplyr::group_data", "data.frame", dplyr$group_data.data.frame)
vctrs::s3_register("dplyr::group_keys", "data.frame", dplyr$group_keys.data.frame)
vctrs::s3_register("dplyr::group_map", "data.frame", dplyr$group_map.data.frame)
vctrs::s3_register("dplyr::group_vars", "data.frame", dplyr$group_vars.data.frame)
vctrs::s3_register("dplyr::inner_join", "data.frame", dplyr$inner_join.data.frame)
vctrs::s3_register("dplyr::intersect", "data.frame", dplyr$intersect.data.frame)
Expand Down
23 changes: 23 additions & 0 deletions dplyr-methods/group_map.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
group_map.data.frame <- function(.data, .f, ..., .keep = FALSE, keep = deprecated()) {
if (!missing(keep)) {
lifecycle::deprecate_warn("1.0.0", "group_map(keep = )", "group_map(.keep = )", always = TRUE)
.keep <- keep
}
.f <- as_group_map_function(.f)

# call the function on each group
chunks <- if (is_grouped_df(.data)) {
group_split(.data, .keep = isTRUE(.keep))
} else {
group_split(.data)
}
keys <- group_keys(.data)
group_keys <- map(seq_len(nrow(keys)), function(i) keys[i, , drop = FALSE])

if (length(chunks)) {
map2(chunks, group_keys, .f, ...)
} else {
# calling .f with .x and .y set to prototypes
structure(list(), ptype = .f(attr(chunks, "ptype"), keys[integer(0L), ], ...))
}
}
30 changes: 0 additions & 30 deletions tests/testthat/_snaps/group-map.md

This file was deleted.

16 changes: 16 additions & 0 deletions tests/testthat/test-as_duckplyr_df.R
Original file line number Diff line number Diff line change
Expand Up @@ -786,6 +786,22 @@ test_that("as_duckplyr_df() and group_keys()", {
expect_equal(pre, post)
})

test_that("as_duckplyr_df() and group_map(~ .x)", {
withr::local_envvar(DUCKPLYR_FORCE = "FALSE")

skip("WAT")

# Data
test_df <- data.frame(a = 1:6 + 0, b = 2, g = rep(1:3, 1:3))

# Run
pre <- test_df %>% as_duckplyr_df() %>% group_map(~ .x)
post <- test_df %>% group_map(~ .x) %>% as_duckplyr_df()

# Compare
expect_equal(pre, post)
})

test_that("as_duckplyr_df() and group_vars()", {
withr::local_envvar(DUCKPLYR_FALLBACK_FORCE = "TRUE")

Expand Down
32 changes: 16 additions & 16 deletions tests/testthat/test-group-map.R
Original file line number Diff line number Diff line change
@@ -1,31 +1,31 @@
test_that("group_map() respects empty groups", {
test_that("duckplyr_group_map() respects empty groups", {
res <- duckplyr_group_by(mtcars, cyl) %>%
group_map(~ head(.x, 2L))
duckplyr_group_map(~ head(.x, 2L))
expect_equal(length(res), 3L)

res <- iris %>%
duckplyr_group_by(Species) %>%
duckplyr_filter(Species == "setosa") %>%
group_map(~ tally(.x))
duckplyr_group_map(~ tally(.x))
expect_equal(length(res), 1L)

res <- iris %>%
duckplyr_group_by(Species, .drop = FALSE) %>%
duckplyr_filter(Species == "setosa") %>%
group_map(~ tally(.x))
duckplyr_group_map(~ tally(.x))
expect_equal(length(res), 3L)
})

test_that("group_map() can return arbitrary objects", {
test_that("duckplyr_group_map() can return arbitrary objects", {
expect_equal(
duckplyr_group_by(mtcars, cyl) %>% group_map(~ 10),
duckplyr_group_by(mtcars, cyl) %>% duckplyr_group_map(~ 10),
rep(list(10), 3)
)
})

test_that("group_map() works on ungrouped data frames (#4067)", {
test_that("duckplyr_group_map() works on ungrouped data frames (#4067)", {
expect_identical(
group_map(mtcars, ~ head(.x, 2L)),
duckplyr_group_map(mtcars, ~ head(.x, 2L)),
list(head(as_tibble(mtcars), 2L))
)
})
Expand All @@ -52,12 +52,12 @@ test_that("group_modify() makes a grouped_df", {
expect_equal(as.list(group_rows(res)), list(1L, 2L, 3L))
})

test_that("group_modify() and group_map() want functions with at least 2 arguments, or ... (#3996)", {
test_that("group_modify() and duckplyr_group_map() want functions with at least 2 arguments, or ... (#3996)", {
head1 <- function(d, ...) head(d, 1)

g <- iris %>% duckplyr_group_by(Species)
expect_equal(nrow(group_modify(g, head1)), 3L)
expect_equal(length(group_map(g, head1)), 3L)
expect_equal(length(duckplyr_group_map(g, head1)), 3L)
})

test_that("group_modify() works on ungrouped data frames (#4067)", {
Expand All @@ -68,11 +68,11 @@ test_that("group_modify() works on ungrouped data frames (#4067)", {
)
})

test_that("group_map() uses ptype on empty splits (#4421)", {
test_that("duckplyr_group_map() uses ptype on empty splits (#4421)", {
res <- mtcars %>%
duckplyr_group_by(cyl) %>%
duckplyr_filter(hp > 1000) %>%
group_map(~.x)
duckplyr_group_map(~.x)
expect_equal(res, list(), ignore_attr = TRUE)
ptype <- attr(res, "ptype")
expect_equal(names(ptype), duckplyr_setdiff(names(mtcars), "cyl"))
Expand Down Expand Up @@ -108,14 +108,14 @@ test_that("group_modify() works with additional arguments (#4509)", {
)
})

test_that("group_map() does not warn about .keep= for rowwise_df", {
test_that("duckplyr_group_map() does not warn about .keep= for rowwise_df", {
expect_warning(
data.frame(x = 1) %>% duckplyr_rowwise() %>% group_walk(~ {}),
NA
)
})

test_that("group_map() give meaningful errors", {
test_that("duckplyr_group_map() give meaningful errors", {
head1 <- function(d) head(d, 1)

expect_snapshot({
Expand All @@ -124,8 +124,8 @@ test_that("group_map() give meaningful errors", {
(expect_error(mtcars %>% duckplyr_group_by(cyl) %>% group_modify(~ 10)))
(expect_error(iris %>% duckplyr_group_by(Species) %>% group_modify(head1)))

# group_map()
(expect_error(iris %>% duckplyr_group_by(Species) %>% group_map(head1)))
# duckplyr_group_map()
(expect_error(iris %>% duckplyr_group_by(Species) %>% duckplyr_group_map(head1)))
})

})
3 changes: 2 additions & 1 deletion tools/00-funs.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ df_methods <-
filter(!grepl("_$|^as[.]tbl$", name)) %>%
# special dplyr methods, won't implement
filter(!(name %in% c(
"group_indices", "group_map", "group_modify", "group_nest", "group_size", "group_split", "group_trim", "groups", "n_groups",
"group_indices", "group_modify", "group_nest", "group_size", "group_split", "group_trim", "groups", "n_groups",
"same_src", # data frames can be copied into duck-frames with zero cost
NULL
))) %>%
Expand All @@ -27,6 +27,7 @@ df_methods <-
"group_by",
"group_data",
"group_keys",
"group_map",
"rowwise",
NULL
))) %>%
Expand Down

0 comments on commit e81f3ed

Please sign in to comment.