Skip to content

Commit

Permalink
feat: n_distinct() supports na.rm = TRUE with a single vector arg…
Browse files Browse the repository at this point in the history
…ument again (#216)
  • Loading branch information
krlmlr authored Sep 14, 2024
2 parents 75f0a44 + e66fcfe commit 698fc54
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 6 deletions.
19 changes: 14 additions & 5 deletions R/backend-dbplyr__duckdb_connection.R
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,22 @@ duckdb_n_distinct <- function(..., na.rm = FALSE) {
check_dots_unnamed()

if (!identical(na.rm, FALSE)) {
stop("Parameter `na.rm = TRUE` in n_distinct() is currently not supported in DuckDB backend.", call. = FALSE)
}
cols <- list(...)

# check for more than one vector argument: only one vector is supported
# Why not use ROW() as well? Because duckdb's FILTER clause does not support
# a windowing context as of now: https://duckdb.org/docs/sql/query_syntax/filter.html
if (length(cols) > 1) {
stop("n_distinct(): Only one vector argument is currently supported when `na.rm = TRUE`.", call. = FALSE)
}

# https://duckdb.org/docs/sql/data_types/struct.html#creating-structs-with-the-row-function
str_struct <- paste0("row(", paste0(list(...), collapse = ", "), ")")
return(sql(paste0("COUNT(DISTINCT ", cols[[1]], ")")))
} else {
# https://duckdb.org/docs/sql/data_types/struct.html#creating-structs-with-the-row-function
str_struct <- paste0("row(", paste0(list(...), collapse = ", "), ")")

sql(paste0("COUNT(DISTINCT ", str_struct, ")"))
return(sql(paste0("COUNT(DISTINCT ", str_struct, ")")))
}
}

# Customized translation functions for DuckDB SQL
Expand Down
14 changes: 13 additions & 1 deletion tests/testthat/test-backend-dbplyr__duckdb_connection.R
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,9 @@ test_that("aggregators translated correctly", {
expect_equal(translate(n_distinct(x), window = FALSE), sql(r"{COUNT(DISTINCT row(x))}"))
expect_equal(translate(n_distinct(x), window = TRUE), sql(r"{COUNT(DISTINCT row(x)) OVER ()}"))
expect_equal(translate(n_distinct(x), window = TRUE, vars_group = "y"), sql(r"{COUNT(DISTINCT row(x)) OVER (PARTITION BY y)}"))
expect_equal(translate(n_distinct(x, na.rm = TRUE), window = FALSE), sql(r"{COUNT(DISTINCT x)}"))
expect_equal(translate(n_distinct(x, na.rm = TRUE), window = TRUE), sql(r"{COUNT(DISTINCT x) OVER ()}"))
expect_equal(translate(n_distinct(x, na.rm = TRUE), window = TRUE, vars_group = "y"), sql(r"{COUNT(DISTINCT x) OVER (PARTITION BY y)}"))
})

test_that("two variable aggregates are translated correctly", {
Expand Down Expand Up @@ -317,8 +320,17 @@ test_that("n_distinct() computations are correct", {
df <- tbl(con, "df")
df_na <- tbl(con, "df_na")

expect_equal(
pull(summarize(df, n = n_distinct(x, na.rm = TRUE)), n),
2
)
expect_equal(
pull(summarize(df_na, n = n_distinct(x, na.rm = TRUE)), n),
2
)

expect_error(
pull(summarize(df, n = n_distinct(x, na.rm = TRUE)), n)
pull(summarize(df, n = n_distinct(x, y, na.rm = TRUE)), n)
)

# single column is working as usual
Expand Down

0 comments on commit 698fc54

Please sign in to comment.