Skip to content

Commit

Permalink
group_size
Browse files Browse the repository at this point in the history
  • Loading branch information
krlmlr committed Mar 7, 2024
1 parent eed067b commit 674690c
Show file tree
Hide file tree
Showing 12 changed files with 72 additions and 16 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ S3method(group_data,duckplyr_df)
S3method(group_keys,duckplyr_df)
S3method(group_map,duckplyr_df)
S3method(group_modify,duckplyr_df)
S3method(group_size,duckplyr_df)
S3method(group_vars,duckplyr_df)
S3method(head,duckplyr_df)
S3method(inner_join,duckplyr_df)
Expand Down
32 changes: 32 additions & 0 deletions R/group_size.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Generated by 02-duckplyr_df-methods.R
#' @export
group_size.duckplyr_df <- function(x) {
# Our implementation
rel_try(
# Always fall back to dplyr
"No relational implementation for group_size()" = TRUE,
{
return(out)
}
)

# dplyr forward
group_size <- dplyr$group_size.data.frame
out <- group_size(x)
return(out)

# dplyr implementation
lengths(group_rows(x))
}

duckplyr_group_size <- function(x, ...) {
try_fetch(
x <- as_duckplyr_df(x),
error = function(e) {
testthat::skip(conditionMessage(e))
}
)
out <- group_size(x, ...)
class(out) <- setdiff(class(out), "duckplyr_df")
out
}
1 change: 1 addition & 0 deletions R/overwrite.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ methods_overwrite <- function() {
vctrs::s3_register("dplyr::group_keys", "data.frame", group_keys.duckplyr_df)
vctrs::s3_register("dplyr::group_map", "data.frame", group_map.duckplyr_df)
vctrs::s3_register("dplyr::group_modify", "data.frame", group_modify.duckplyr_df)
vctrs::s3_register("dplyr::group_size", "data.frame", group_size.duckplyr_df)
vctrs::s3_register("dplyr::group_vars", "data.frame", group_vars.duckplyr_df)
vctrs::s3_register("dplyr::inner_join", "data.frame", inner_join.duckplyr_df)
vctrs::s3_register("dplyr::intersect", "data.frame", intersect.duckplyr_df)
Expand Down
1 change: 1 addition & 0 deletions R/restore.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ methods_restore <- function() {
vctrs::s3_register("dplyr::group_keys", "data.frame", dplyr$group_keys.data.frame)
vctrs::s3_register("dplyr::group_map", "data.frame", dplyr$group_map.data.frame)
vctrs::s3_register("dplyr::group_modify", "data.frame", dplyr$group_modify.data.frame)
vctrs::s3_register("dplyr::group_size", "data.frame", dplyr$group_size.data.frame)
vctrs::s3_register("dplyr::group_vars", "data.frame", dplyr$group_vars.data.frame)
vctrs::s3_register("dplyr::inner_join", "data.frame", dplyr$inner_join.data.frame)
vctrs::s3_register("dplyr::intersect", "data.frame", dplyr$intersect.data.frame)
Expand Down
3 changes: 3 additions & 0 deletions dplyr-methods/group_size.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
group_size.data.frame <- function(x) {
lengths(group_rows(x))
}
4 changes: 2 additions & 2 deletions tests/testthat/test-arrange.R
Original file line number Diff line number Diff line change
Expand Up @@ -334,8 +334,8 @@ test_that("arrange keeps zero length groups",{
)
df <- duckplyr_group_by(df, e, f, g, .drop = FALSE)

expect_equal( group_size(duckplyr_arrange(df)), c(2, 2, 0) )
expect_equal( group_size(duckplyr_arrange(df, x)), c(2, 2, 0) )
expect_equal( duckplyr_group_size(duckplyr_arrange(df)), c(2, 2, 0) )
expect_equal( duckplyr_group_size(duckplyr_arrange(df, x)), c(2, 2, 0) )
})

# legacy --------------------------------------------------------------
Expand Down
16 changes: 16 additions & 0 deletions tests/testthat/test-as_duckplyr_df.R
Original file line number Diff line number Diff line change
Expand Up @@ -818,6 +818,22 @@ test_that("as_duckplyr_df() and group_modify(~ .x)", {
expect_equal(pre, post)
})

test_that("as_duckplyr_df() and group_size()", {
withr::local_envvar(DUCKPLYR_FORCE = "FALSE")

skip("Special")

# Data
test_df <- data.frame(a = 1:6 + 0, b = 2, g = rep(1:3, 1:3))

# Run
pre <- test_df %>% as_duckplyr_df() %>% group_size()
post <- test_df %>% group_size() %>% as_duckplyr_df()

# Compare
expect_equal(pre, post)
})

test_that("as_duckplyr_df() and group_vars()", {
withr::local_envvar(DUCKPLYR_FALLBACK_FORCE = "TRUE")

Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-filter.R
Original file line number Diff line number Diff line change
Expand Up @@ -618,7 +618,7 @@ test_that("filter keeps zero length groups", {
)
df <- duckplyr_group_by(df, e, f, g, .drop = FALSE)

expect_equal(group_size(duckplyr_filter(df, f == 1)), c(2, 0, 0) )
expect_equal(duckplyr_group_size(duckplyr_filter(df, f == 1)), c(2, 0, 0) )
})

test_that("filtering retains labels for zero length groups", {
Expand Down
20 changes: 10 additions & 10 deletions tests/testthat/test-join.R
Original file line number Diff line number Diff line change
Expand Up @@ -647,23 +647,23 @@ test_that("joins respect zero length groups", {
df2 <- tibble(f = factor( c(2,2,3,3), levels = 1:3), y = c(1,2,3,4)) %>%
duckplyr_group_by(f)

expect_equal(group_size(duckplyr_left_join( df1, df2, by = "f", relationship = "many-to-many")), c(2,4))
expect_equal(group_size(duckplyr_right_join( df1, df2, by = "f", relationship = "many-to-many")), c(4,2))
expect_equal(group_size(duckplyr_full_join( df1, df2, by = "f", relationship = "many-to-many")), c(2,4,2))
expect_equal(group_size(duckplyr_anti_join( df1, df2, by = "f")), c(2))
expect_equal(group_size(duckplyr_inner_join( df1, df2, by = "f", relationship = "many-to-many")), c(4))
expect_equal(duckplyr_group_size(duckplyr_left_join( df1, df2, by = "f", relationship = "many-to-many")), c(2,4))
expect_equal(duckplyr_group_size(duckplyr_right_join( df1, df2, by = "f", relationship = "many-to-many")), c(4,2))
expect_equal(duckplyr_group_size(duckplyr_full_join( df1, df2, by = "f", relationship = "many-to-many")), c(2,4,2))
expect_equal(duckplyr_group_size(duckplyr_anti_join( df1, df2, by = "f")), c(2))
expect_equal(duckplyr_group_size(duckplyr_inner_join( df1, df2, by = "f", relationship = "many-to-many")), c(4))


df1 <- tibble(f = factor( c(1,1,2,2), levels = 1:3), x = c(1,2,1,4)) %>%
duckplyr_group_by(f, .drop = FALSE)
df2 <- tibble(f = factor( c(2,2,3,3), levels = 1:3), y = c(1,2,3,4)) %>%
duckplyr_group_by(f, .drop = FALSE)

expect_equal(group_size(duckplyr_left_join( df1, df2, by = "f", relationship = "many-to-many")), c(2,4,0))
expect_equal(group_size(duckplyr_right_join( df1, df2, by = "f", relationship = "many-to-many")), c(0,4,2))
expect_equal(group_size(duckplyr_full_join( df1, df2, by = "f", relationship = "many-to-many")), c(2,4,2))
expect_equal(group_size(duckplyr_anti_join( df1, df2, by = "f")), c(2,0,0))
expect_equal(group_size(duckplyr_inner_join( df1, df2, by = "f", relationship = "many-to-many")), c(0,4,0))
expect_equal(duckplyr_group_size(duckplyr_left_join( df1, df2, by = "f", relationship = "many-to-many")), c(2,4,0))
expect_equal(duckplyr_group_size(duckplyr_right_join( df1, df2, by = "f", relationship = "many-to-many")), c(0,4,2))
expect_equal(duckplyr_group_size(duckplyr_full_join( df1, df2, by = "f", relationship = "many-to-many")), c(2,4,2))
expect_equal(duckplyr_group_size(duckplyr_anti_join( df1, df2, by = "f")), c(2,0,0))
expect_equal(duckplyr_group_size(duckplyr_inner_join( df1, df2, by = "f", relationship = "many-to-many")), c(0,4,0))
})

test_that("group column names reflect renamed duplicate columns (#2330)", {
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-mutate.R
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,7 @@ test_that("mutate keeps zero length groups", {
)
df <- duckplyr_group_by(df, e, f, g, .drop = FALSE)

expect_equal( group_size(duckplyr_mutate(df, z = 2)), c(2, 2, 0) )
expect_equal( duckplyr_group_size(duckplyr_mutate(df, z = 2)), c(2, 2, 0) )
})

# other -------------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-slice.R
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ test_that("slice keeps zero length groups", {
)
df <- duckplyr_group_by(df, e, f, g, .drop = FALSE)

expect_equal(group_size(duckplyr_slice(df, 1)), c(1, 1, 0) )
expect_equal(duckplyr_group_size(duckplyr_slice(df, 1)), c(1, 1, 0) )
})

test_that("slicing retains labels for zero length groups", {
Expand Down
4 changes: 3 additions & 1 deletion tools/00-funs.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ df_methods <-
filter(!grepl("_$|^as[.]tbl$", name)) %>%
# special dplyr methods, won't implement
filter(!(name %in% c(
"group_indices", "group_nest", "group_size", "group_split", "group_trim", "groups", "n_groups",
"group_indices", "group_nest", "group_split", "group_trim", "groups", "n_groups",
"same_src", # data frames can be copied into duck-frames with zero cost
NULL
))) %>%
Expand All @@ -29,6 +29,7 @@ df_methods <-
"group_keys",
"group_map",
"group_modify",
"group_size",
"rowwise",
NULL
))) %>%
Expand Down Expand Up @@ -700,6 +701,7 @@ test_skip_map <- c(
group_map = "WAT",
group_modify = "Grouped",
group_nest = "Always returns tibble",
group_size = "Special",
group_split = "WAT",
group_trim = "Grouped",
nest_by = "WAT",
Expand Down

0 comments on commit 674690c

Please sign in to comment.