Skip to content

Commit

Permalink
group_data
Browse files Browse the repository at this point in the history
  • Loading branch information
krlmlr committed Mar 7, 2024
1 parent 34ab6c9 commit 8c920bc
Show file tree
Hide file tree
Showing 11 changed files with 77 additions and 9 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ S3method(dplyr_row_slice,duckplyr_df)
S3method(explain,duckplyr_df)
S3method(full_join,duckplyr_df)
S3method(group_by,duckplyr_df)
S3method(group_data,duckplyr_df)
S3method(group_vars,duckplyr_df)
S3method(head,duckplyr_df)
S3method(inner_join,duckplyr_df)
Expand Down Expand Up @@ -330,6 +331,7 @@ importFrom(tidyselect,everything)
importFrom(utils,globalVariables)
importFrom(utils,head)
importFrom(vctrs,new_data_frame)
importFrom(vctrs,new_list_of)
importFrom(vctrs,new_rcrd)
importFrom(vctrs,unspecified)
importFrom(vctrs,vec_as_names)
Expand Down
1 change: 1 addition & 0 deletions R/duckplyr-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#' @importFrom utils globalVariables
#' @importFrom utils head
#' @importFrom vctrs new_data_frame
#' @importFrom vctrs new_list_of
#' @importFrom vctrs new_rcrd
#' @importFrom vctrs unspecified
#' @importFrom vctrs vec_as_names
Expand Down
37 changes: 37 additions & 0 deletions R/group_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Generated by 02-duckplyr_df-methods.R
#' @export
group_data.duckplyr_df <- function(.data) {
# Our implementation
rel_try(
# Always fall back to dplyr
"No relational implementation for group_data()" = TRUE,
{
return(out)
}
)

# dplyr forward
group_data <- dplyr$group_data.data.frame
out <- group_data(.data)
return(out)

# dplyr implementation
size <- nrow(.data)
out <- seq_len(size)
out <- new_list_of(list(out), ptype = integer())
out <- list(.rows = out)
out <- new_data_frame(out, n = 1L)
out
}

duckplyr_group_data <- function(.data, ...) {
try_fetch(
.data <- as_duckplyr_df(.data),
error = function(e) {
testthat::skip(conditionMessage(e))
}
)
out <- group_data(.data, ...)
class(out) <- setdiff(class(out), "duckplyr_df")
out
}
1 change: 1 addition & 0 deletions R/overwrite.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ methods_overwrite <- function() {
vctrs::s3_register("dplyr::filter", "data.frame", filter.duckplyr_df)
vctrs::s3_register("dplyr::full_join", "data.frame", full_join.duckplyr_df)
vctrs::s3_register("dplyr::group_by", "data.frame", group_by.duckplyr_df)
vctrs::s3_register("dplyr::group_data", "data.frame", group_data.duckplyr_df)
vctrs::s3_register("dplyr::group_vars", "data.frame", group_vars.duckplyr_df)
vctrs::s3_register("dplyr::inner_join", "data.frame", inner_join.duckplyr_df)
vctrs::s3_register("dplyr::intersect", "data.frame", intersect.duckplyr_df)
Expand Down
1 change: 1 addition & 0 deletions R/restore.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ methods_restore <- function() {
vctrs::s3_register("dplyr::filter", "data.frame", dplyr$filter.data.frame)
vctrs::s3_register("dplyr::full_join", "data.frame", dplyr$full_join.data.frame)
vctrs::s3_register("dplyr::group_by", "data.frame", dplyr$group_by.data.frame)
vctrs::s3_register("dplyr::group_data", "data.frame", dplyr$group_data.data.frame)
vctrs::s3_register("dplyr::group_vars", "data.frame", dplyr$group_vars.data.frame)
vctrs::s3_register("dplyr::inner_join", "data.frame", dplyr$inner_join.data.frame)
vctrs::s3_register("dplyr::intersect", "data.frame", dplyr$intersect.data.frame)
Expand Down
8 changes: 8 additions & 0 deletions dplyr-methods/group_data.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
group_data.data.frame <- function(.data) {
size <- nrow(.data)
out <- seq_len(size)
out <- new_list_of(list(out), ptype = integer())
out <- list(.rows = out)
out <- new_data_frame(out, n = 1L)
out
}
16 changes: 16 additions & 0 deletions tests/testthat/test-as_duckplyr_df.R
Original file line number Diff line number Diff line change
Expand Up @@ -754,6 +754,22 @@ test_that("as_duckplyr_df() and group_by()", {
expect_equal(pre, post)
})

test_that("as_duckplyr_df() and group_data()", {
withr::local_envvar(DUCKPLYR_FORCE = "FALSE")

skip("Special")

# Data
test_df <- data.frame(a = 1:6 + 0, b = 2, g = rep(1:3, 1:3))

# Run
pre <- test_df %>% as_duckplyr_df() %>% group_data()
post <- test_df %>% group_data() %>% as_duckplyr_df()

# Compare
expect_equal(pre, post)
})

test_that("as_duckplyr_df() and group_vars()", {
withr::local_envvar(DUCKPLYR_FALLBACK_FORCE = "TRUE")

Expand Down
6 changes: 3 additions & 3 deletions tests/testthat/test-generics.R
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
test_that("row_slice recomputes groups", {
gf <- duckplyr_group_by(data.frame(g = c(1, 1, 2, 2, 3, 3)), g)
out <- duckplyr_dplyr_row_slice(gf, c(1L, 3L, 5L))
expect_equal(group_data(out)$.rows, list_of(1L, 2L, 3L))
expect_equal(duckplyr_group_data(out)$.rows, list_of(1L, 2L, 3L))

out <- duckplyr_dplyr_row_slice(gf, c(4L, 3L))
expect_equal(group_data(out)$.rows, list_of(c(1L, 2L)))
expect_equal(duckplyr_group_data(out)$.rows, list_of(c(1L, 2L)))
})

test_that("row_slice preserves empty groups if requested", {
gf <- duckplyr_group_by(data.frame(g = c(1, 1, 2, 2, 3, 3)), g, .drop = FALSE)
out <- duckplyr_dplyr_row_slice(gf, c(3L, 4L))
expect_equal(group_data(out)$.rows, list_of(integer(), c(1L, 2L), integer()))
expect_equal(duckplyr_group_data(out)$.rows, list_of(integer(), c(1L, 2L), integer()))
})


Expand Down
6 changes: 3 additions & 3 deletions tests/testthat/test-mutate.R
Original file line number Diff line number Diff line change
Expand Up @@ -303,11 +303,11 @@ test_that("mutate preserves grouping", {
i <- count_regroups(out <- duckplyr_mutate(gf, x = 1))
expect_equal(i, 1L)
expect_equal(duckplyr_group_vars(out), "x")
expect_equal(nrow(group_data(out)), 1)
expect_equal(nrow(duckplyr_group_data(out)), 1)

i <- count_regroups(out <- duckplyr_mutate(gf, z = 1))
expect_equal(i, 0)
expect_equal(group_data(out), group_data(gf))
expect_equal(duckplyr_group_data(out), duckplyr_group_data(gf))
})

test_that("mutate works on zero-row grouped data frame (#596)", {
Expand All @@ -319,7 +319,7 @@ test_that("mutate works on zero-row grouped data frame (#596)", {

expect_type(group_rows(res), "list")
expect_equal(attr(group_rows(res), "ptype"), integer())
expect_equal(group_data(res)$b, factor(character(0)))
expect_equal(duckplyr_group_data(res)$b, factor(character(0)))
})

test_that("mutate preserves class of zero-row rowwise (#4224, #6303)", {
Expand Down
4 changes: 2 additions & 2 deletions tests/testthat/test-transmute.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ test_that("transmute preserves grouping", {
i <- count_regroups(out <- duckplyr_transmute(gf, x = 1))
expect_equal(i, 1L)
expect_equal(duckplyr_group_vars(out), "x")
expect_equal(nrow(group_data(out)), 1)
expect_equal(nrow(duckplyr_group_data(out)), 1)

i <- count_regroups(out <- duckplyr_transmute(gf, z = 1))
expect_equal(i, 0)
expect_equal(group_data(out), group_data(gf))
expect_equal(duckplyr_group_data(out), duckplyr_group_data(gf))
})

# Empty transmutes -------------------------------------------------
Expand Down
4 changes: 3 additions & 1 deletion tools/00-funs.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ df_methods <-
filter(!grepl("_$|^as[.]tbl$", name)) %>%
# special dplyr methods, won't implement
filter(!(name %in% c(
"group_data", "group_indices", "group_keys", "group_map", "group_modify", "group_nest", "group_size", "group_split", "group_trim", "groups", "n_groups",
"group_indices", "group_keys", "group_map", "group_modify", "group_nest", "group_size", "group_split", "group_trim", "groups", "n_groups",
"same_src", # data frames can be copied into duck-frames with zero cost
NULL
))) %>%
Expand All @@ -25,6 +25,7 @@ df_methods <-
"dplyr_col_modify",
"dplyr_row_slice",
"group_by",
"group_data",
"rowwise",
NULL
))) %>%
Expand Down Expand Up @@ -691,6 +692,7 @@ test_skip_map <- c(
# FIXME: Fail with group_by()
dplyr_reconstruct = "Hack",
group_by = "Grouped",
group_data = "Special",
group_map = "WAT",
group_modify = "Grouped",
group_nest = "Always returns tibble",
Expand Down

0 comments on commit 8c920bc

Please sign in to comment.