From 85f70e3bf64ea3fc7350506c33967f309e13bde8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Sat, 9 Sep 2023 20:40:16 +0200 Subject: [PATCH 1/8] Sync documentation with v0.1.0 --- .Rbuildignore | 4 + .aspell/defaults.R | 4 + .aspell/duckplyr.rds | Bin 0 -> 79 bytes .github/.gitignore | 1 + .gitignore | 1 + DESCRIPTION | 10 +- NAMESPACE | 21 +- NEWS.md | 28 ++ R/relational-expr.R | 54 +++- R/relational-rel.R | 338 ++++++++++++------------ README.Rmd | 112 +++++++- README.md | 215 ++++++++++++--- _pkgdown.yml | 4 + cran-comments.md | 3 + man/duckplyr-package.Rd | 8 +- man/expr.Rd | 41 ++- man/methods_overwrite.Rd | 11 - man/methods_restore.Rd | 11 - man/new_relational.Rd | 16 -- man/rel.Rd | 19 -- man/rel_aggregate.Rd | 28 -- man/rel_alias.Rd | 20 -- man/rel_distinct.Rd | 23 -- man/rel_explain.Rd | 20 -- man/rel_filter.Rd | 32 --- man/rel_join.Rd | 47 ---- man/rel_limit.Rd | 18 -- man/rel_order.Rd | 25 -- man/rel_project.Rd | 25 -- man/rel_set_alias.Rd | 22 -- man/rel_set_diff.Rd | 25 -- man/rel_set_intersect.Rd | 25 -- man/rel_set_symdiff.Rd | 25 -- man/rel_to_df.Rd | 31 --- man/rel_union_all.Rd | 25 -- man/relational.Rd | 233 ++++++++++++++++ tests/testthat/_snaps/relational-rel.md | 9 + tests/testthat/test-relational-rel.R | 5 + tools/spelling.R | 6 + 39 files changed, 853 insertions(+), 692 deletions(-) create mode 100644 .aspell/defaults.R create mode 100644 .aspell/duckplyr.rds create mode 100644 NEWS.md create mode 100644 _pkgdown.yml create mode 100644 cran-comments.md delete mode 100644 man/methods_overwrite.Rd delete mode 100644 man/methods_restore.Rd delete mode 100644 man/new_relational.Rd delete mode 100644 man/rel.Rd delete mode 100644 man/rel_aggregate.Rd delete mode 100644 man/rel_alias.Rd delete mode 100644 man/rel_distinct.Rd delete mode 100644 man/rel_explain.Rd delete mode 100644 man/rel_filter.Rd delete mode 100644 man/rel_join.Rd delete mode 100644 man/rel_limit.Rd delete mode 100644 man/rel_order.Rd delete mode 100644 man/rel_project.Rd delete mode 100644 man/rel_set_alias.Rd delete mode 100644 man/rel_set_diff.Rd delete mode 100644 man/rel_set_intersect.Rd delete mode 100644 man/rel_set_symdiff.Rd delete mode 100644 man/rel_to_df.Rd delete mode 100644 man/rel_union_all.Rd create mode 100644 man/relational.Rd create mode 100644 tests/testthat/_snaps/relational-rel.md create mode 100644 tests/testthat/test-relational-rel.R create mode 100644 tools/spelling.R diff --git a/.Rbuildignore b/.Rbuildignore index 7422e3ea..b88e12b8 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -16,3 +16,7 @@ ^R/meta\.R$ ^R/tpch_raw_(?:oo_)?[0-9]+\.R$ ^.ccache$ +^_pkgdown\.yml$ +^docs$ +^pkgdown$ +^cran-comments\.md$ diff --git a/.aspell/defaults.R b/.aspell/defaults.R new file mode 100644 index 00000000..a1d6c8d9 --- /dev/null +++ b/.aspell/defaults.R @@ -0,0 +1,4 @@ +Rd_files <- vignettes <- R_files <- description <- + list(encoding = "UTF-8", + language = "en", + dictionaries = c("en_stats", "duckplyr")) diff --git a/.aspell/duckplyr.rds b/.aspell/duckplyr.rds new file mode 100644 index 0000000000000000000000000000000000000000..b21057a7580e8f36eabcfd7d8743a2c754ddb12f GIT binary patch literal 79 zcmb2|=3oE==I#ec2?+^F35iUT&N!$wGYD*EP*k6g=Bs&PrA|}P4&!3(qM+j7&t5@^ fPC0r@Pu|q?3y`ZWOwIF3i;`m4y7v0iV4yhw<_8-K literal 0 HcmV?d00001 diff --git a/.github/.gitignore b/.github/.gitignore index 6917e2d7..17c2aa70 100644 --- a/.github/.gitignore +++ b/.github/.gitignore @@ -1 +1,2 @@ /pkg.lock +*.html diff --git a/.gitignore b/.gitignore index ff13cbd4..7b6d3658 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ .Rproj.user /.ccache/ +/docs diff --git a/DESCRIPTION b/DESCRIPTION index dcad110b..82297393 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -3,13 +3,15 @@ Package: duckplyr Title: A DuckDB-backed version of dplyr Version: 0.0.1 Authors@R: c( - person("Hannes", "Mühleisen", , "hannes@duckdblabs.com", role = c("aut", "cre"), + person("Hannes", "Mühleisen", role = "aut", comment = c(ORCID = "0000-0001-8552-0029")), - person("Kirill", "Müller", role = "aut", + person("Kirill", "Müller", , "kirill@cynkra.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-1416-3412")), - person("Posit", role = c("cph", "fnd")) + person("Posit Software, PBC", role = c("cph", "fnd")) ) -Description: Implements a subset of dplyr using DuckDB. +Description: A drop-in replacement for dplyr, powered by DuckDB for performance. + Also defines a set of generics that provide a low-level + implementer's interface for the high-level user interface of dplyr. License: MIT + file LICENSE URL: https://github.com/duckdblabs/duckplyr BugReports: https://github.com/duckdblabs/duckplyr/issues diff --git a/NAMESPACE b/NAMESPACE index b7005f6f..53fd4df3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,7 +12,6 @@ S3method(distinct,duckplyr_df) S3method(do,duckplyr_df) S3method(dplyr_reconstruct,duckplyr_df) S3method(explain,duckplyr_df) -S3method(format,relational_relexpr) S3method(full_join,duckplyr_df) S3method(group_vars,duckplyr_df) S3method(head,duckplyr_df) @@ -27,12 +26,8 @@ S3method(pull,duckplyr_df) S3method(reframe,duckplyr_df) S3method(rel_aggregate,duckdb_relation) S3method(rel_aggregate,relational_df) -S3method(rel_alias,duckdb_relation) -S3method(rel_alias,relational_df) S3method(rel_distinct,duckdb_relation) S3method(rel_distinct,relational_df) -S3method(rel_explain,duckdb_relation) -S3method(rel_explain,relational_df) S3method(rel_filter,duckdb_relation) S3method(rel_filter,relational_df) S3method(rel_join,duckdb_relation) @@ -45,8 +40,6 @@ S3method(rel_order,duckdb_relation) S3method(rel_order,relational_df) S3method(rel_project,duckdb_relation) S3method(rel_project,relational_df) -S3method(rel_set_alias,duckdb_relation) -S3method(rel_set_alias,relational_df) S3method(rel_set_diff,duckdb_relation) S3method(rel_set_diff,relational_df) S3method(rel_set_intersect,duckdb_relation) @@ -55,8 +48,6 @@ S3method(rel_set_symdiff,duckdb_relation) S3method(rel_set_symdiff,relational_df) S3method(rel_to_df,duckdb_relation) S3method(rel_to_df,relational_df) -S3method(rel_tostring,duckdb_relation) -S3method(rel_tostring,relational_df) S3method(rel_union_all,duckdb_relation) S3method(rel_union_all,relational_df) S3method(relocate,duckplyr_df) @@ -94,9 +85,11 @@ export(methods_restore) export(new_relational) export(new_relexpr) export(rel_aggregate) -export(rel_alias) +export(rel_alias.duckdb_relation) +export(rel_alias.relational_df) export(rel_distinct) -export(rel_explain) +export(rel_explain.duckdb_relation) +export(rel_explain.relational_df) export(rel_filter) export(rel_from_df) export(rel_join) @@ -104,12 +97,14 @@ export(rel_limit) export(rel_names) export(rel_order) export(rel_project) -export(rel_set_alias) +export(rel_set_alias.duckdb_relation) +export(rel_set_alias.relational_df) export(rel_set_diff) export(rel_set_intersect) export(rel_set_symdiff) export(rel_to_df) -export(rel_tostring) +export(rel_tostring.duckdb_relation) +export(rel_tostring.relational_df) export(rel_union_all) export(relexpr_constant) export(relexpr_function) diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 00000000..3c158490 --- /dev/null +++ b/NEWS.md @@ -0,0 +1,28 @@ + + +# duckplyr 0.1.0 (2023-07-03) + +## Bug fixes + +- Fix examples. + +## Chore + +- Add CRAN install instructions. +- Satisfy `R CMD check`. +- Document argument. +- Error on NOTE. +- Remove `relexpr_window()` for now. + +## Documentation + +- Clean up reference. + +## Uncategorized + +Initial version, exporting: +- `new_relational()` to construct objects of class `"relational"` +- Generics `rel_aggregate()`, `rel_distinct()`, `rel_filter()`, `rel_join()`, `rel_limit()`, `rel_names()`, `rel_order()`, `rel_project()`, `rel_set_diff()`, `rel_set_intersect()`, `rel_set_symdiff()`, `rel_to_df()`, `rel_union_all()` +- `new_relexpr()` to construct objects of class `"relational_relexpr"` +- Expression builders `relexpr_constant()`, `relexpr_function()`, `relexpr_reference()`, `relexpr_set_alias()`, `relexpr_window()` + diff --git a/R/relational-expr.R b/R/relational-expr.R index be5abf59..4a7734bb 100644 --- a/R/relational-expr.R +++ b/R/relational-expr.R @@ -1,20 +1,42 @@ #' Relational expressions #' -#' TBD. +#' @description +#' These functions provide a backend-agnostic way to construct expression trees +#' built of column references, constants, and functions. +#' All subexpressions in an expression tree can have an alias. +#' +#' `new_relexpr()` constructs an object of class `"relational_relexpr"`. +#' It is used by the higher-level constructors, +#' users should rarely need to call it directly. #' #' @param x An object. #' @param class Classes added in front of the `"relational_relexpr"` base class. #' #' @name expr +#' @return an object of class `"relational_relexpr"` #' @export +#' @examples +#' relexpr_set_alias(alias = "my_predicate", +#' relexpr_function("<", +#' list( +#' relexpr_reference("my_number"), +#' relexpr_constant(42) +#' ) +#' ) +#' ) new_relexpr <- function(x, class = NULL) { structure(x, class = unique(c(class, "relational_relexpr"))) } +#' relexpr_reference +#' +#' `relexpr_reference()` constructs a reference to a column. +#' #' @param name The name of the column or function to reference. #' @param rel The name of the relation to reference. #' @param alias An alias for the new expression. #' @rdname expr +#' @return an object of class `"relational_relexpr"` #' @export relexpr_reference <- function(name, rel = NULL, alias = NULL) { stopifnot(is_string(name)) @@ -23,8 +45,13 @@ relexpr_reference <- function(name, rel = NULL, alias = NULL) { new_relexpr(list(name = name, rel = rel, alias = alias), class = "relational_relexpr_reference") } +#' relexpr_constant +#' +#' `relexpr_constant()` wraps a constant value. +#' #' @param val The value to use in the constant expression. #' @rdname expr +#' @return an object of class `"relational_relexpr"` #' @export relexpr_constant <- function(val, alias = NULL) { stopifnot(length(val) == 1) @@ -32,8 +59,14 @@ relexpr_constant <- function(val, alias = NULL) { new_relexpr(list(val = val, alias = alias), class = "relational_relexpr_constant") } +#' relexpr_function +#' +#' `relexpr_function()` applies a function. +#' The arguments to this function are a list of other expression objects. +#' #' @param args Function arguments, a list of `expr` objects. #' @rdname expr +#' @return an object of class `"relational_relexpr"` #' @export relexpr_function <- function(name, args, alias = NULL) { stopifnot(is_string(name)) @@ -42,6 +75,11 @@ relexpr_function <- function(name, args, alias = NULL) { new_relexpr(list(name = name, args = args, alias = alias), class = "relational_relexpr_function") } +#' relexpr_window +#' +#' `relexpr_window()` applies a function over a window, +#' similarly to the SQL `OVER` clause. +#' #' @param partitions Partitions, a list of `expr` objects. #' @param order_bys which variables to order results by (list). #' @param offset_expr offset relational expression. @@ -75,8 +113,13 @@ relexpr_window <- function( ) } +#' relexpr_set_alias +#' +#' `relexpr_set_alias()` assigns an alias to an expression. +#' #' @param expr An `expr` object. #' @rdname expr +#' @return an object of class `"relational_relexpr"` #' @export relexpr_set_alias <- function(expr, alias = NULL) { stopifnot(inherits(expr, "relational_relexpr")) @@ -87,11 +130,6 @@ relexpr_set_alias <- function(expr, alias = NULL) { #' @export print.relational_relexpr <- function(x, ...) { - writeLines(format(x, ...)) -} - -#' @export -format.relational_relexpr <- function(x, ...) { - # FIXME: Use home-grown code - utils::capture.output(print(constructive::construct(x))) + utils::str(x) + invisible(x) } diff --git a/R/relational-rel.R b/R/relational-rel.R index 9c1c9111..fd3b8b14 100644 --- a/R/relational-rel.R +++ b/R/relational-rel.R @@ -8,58 +8,85 @@ rel_stats_get <- function() { arrange(tibble::enframe(unlist(as.list(rel_stats_env)), "fun", "count"), desc(count)) } -#' Relational API -#' -#' TBD. -#' -#' @param ... Passed on to [structure()] -#' @param class Classes added in front of the `"relational"` base class -#' +#' Relational implementer's interface +#' +#' @description +#' The constructor and generics described here define a class +#' that helps separating dplyr's user interface from the actual underlying operations. +#' In the longer term, this will help packages that implement the dplyr interface +#' (such as \pkg{dbplyr}, \pkg{dtplyr}, \pkg{arrow} and similar) +#' to focus on the core details of their functionality, +#' rather than on the intricacies of dplyr's user interface. +#' +#' `new_relational()` constructs an object of class `"relational"`. +#' Users are encouraged to provide the `class` argument. +#' The typical use case will be to create a wrapper function. +#' +#' @param ... Passed on to [structure()]. +#' @param class Classes added in front of the `"relational"` base class. +#' +#' @return +#' - `new_relational()` returns a new relational object. +#' - `rel_to_df()` returns a data frame. +#' - `rel_names()` returns a character vector. +#' - All other generics return a modified relational object. +#' @name relational #' @export +#' @examples +#' new_dfrel <- function(x) { +#' stopifnot(is.data.frame(x)) +#' new_relational(list(x), class = "dfrel") +#' } +#' mtcars_rel <- new_dfrel(mtcars[1:5, 1:4]) new_relational <- function(..., class = NULL) { structure(..., class = unique(c(class, "relational"))) } -#' Convert a relation object to a data frame +#' rel_to_df() #' -#' TBD. +#' `rel_to_df()` extracts a data frame representation from a relational object, +#' to be used by [dplyr::collect()]. #' -#' @param rel The relation object. +#' @param rel,rel_a,rel_b,left,right A relational object. #' @param ... Reserved for future extensions, must be empty. -#' @return A data frame. +#' @rdname relational #' @export #' @examples -#' rel <- rel_from_df(mtcars) -#' rel2 <- rel_filter( -#' rel, -#' list( -#' relexpr_function( -#' "gt", -#' list(relexpr_reference("cyl"), relexpr_constant("6")) -#' ) -#' ) -#' ) +#' +#' rel_to_df.dfrel <- function(rel, ...) { +#' unclass(rel)[[1]] +#' } +#' rel_to_df(mtcars_rel) rel_to_df <- function(rel, ...) { rel_stats_env$rel_to_df <- (rel_stats_env$rel_to_df %||% 0L) + 1L UseMethod("rel_to_df") } -#' Lazily filter a relation object +#' rel_filter #' -#' TBD. +#' `rel_filter()` keeps rows that match a predicate, +#' to be used by [dplyr::filter()]. #' -#' @inheritParams rel_to_df -#' @param exprs a list of DuckDB expressions to filter by -#' @return the now filtered relation object +#' @param exprs A list of [expr] objects to filter by. +#' @rdname relational #' @export -#' @examples -#' rel <- rel_from_df(mtcars) -#' rel2 <- rel_filter( -#' rel, +#' @examplesIf { set.seed(20230630); TRUE } +#' +#' rel_filter.dfrel <- function(rel, exprs, ...) { +#' df <- unclass(rel)[[1]] +#' +#' # A real implementation would evaluate the predicates defined +#' # by the exprs argument +#' new_dfrel(df[sample.int(nrow(df), 3, replace = TRUE), ]) +#' } +#' +#' rel_filter( +#' mtcars_rel, #' list( #' relexpr_function( #' "gt", -#' list(relexpr_reference("cyl"), relexpr_constant("6"))) +#' list(relexpr_reference("cyl"), relexpr_constant("6")) +#' ) #' ) #' ) rel_filter <- function(rel, exprs, ...) { @@ -67,239 +94,214 @@ rel_filter <- function(rel, exprs, ...) { UseMethod("rel_filter") } -#' Lazily project a relation object +#' rel_project #' -#' TBD. +#' `rel_project()` selects columns or creates new columns, +#' to be used by [dplyr::select()], [dplyr::rename()], +#' [dplyr::mutate()], [dplyr::relocate()], and others. #' -#' @inheritParams rel_to_df -#' @param exprs a list of DuckDB expressions to project -#' @return the now projected relation object +#' @rdname relational #' @export #' @examples -#' rel <- rel_from_df(mtcars) -#' rel2 <- rel_project(rel, list(relexpr_reference("cyl"), relexpr_reference("disp"))) +#' +#' rel_project.dfrel <- function(rel, exprs, ...) { +#' df <- unclass(rel)[[1]] +#' +#' # A real implementation would evaluate the expressions defined +#' # by the exprs argument +#' new_dfrel(df[seq_len(min(3, ncol(df)))]) +#' } +#' +#' rel_project( +#' mtcars_rel, +#' list(relexpr_reference("cyl"), relexpr_reference("disp")) +#' ) rel_project <- function(rel, exprs, ...) { rel_stats_env$rel_project <- (rel_stats_env$rel_project %||% 0L) + 1L UseMethod("rel_project") } -#' Lazily aggregate a relation object +#' rel_aggregate #' -#' TBD. +#' `rel_aggregate()` combines several rows into one, +#' to be used by [dplyr::summarize()]. #' -#' @inheritParams rel_to_df -#' @param groups a list of DuckDB expressions to group by -#' @param aggregates a (optionally named) list of DuckDB expressions with aggregates to compute -#' @return the now aggregated relation object +#' @param groups A list of expressions to group by. +#' @param aggregates A list of expressions with aggregates to compute. +#' @rdname relational #' @export -#' @examples -#' rel <- rel_from_df(mtcars) -#' aggrs <- list(avg_hp = relexpr_function("avg", list(relexpr_reference("hp")))) -#' rel2 <- rel_aggregate(rel, list(relexpr_reference("cyl")), aggrs) rel_aggregate <- function(rel, groups, aggregates, ...) { rel_stats_env$rel_aggregate <- (rel_stats_env$rel_aggregate %||% 0L) + 1L UseMethod("rel_aggregate") } -#' Lazily reorder a relation object +#' rel_order #' -#' TBD. +#' `rel_order()` reorders rows by columns or expressions, +#' to be used by [dplyr::arrange()]. #' -#' @inheritParams rel_to_df -#' @param orders a list of DuckDB expressions to order by -#' @return the now aggregated relation object +#' @param orders A list of expressions to order by. +#' @rdname relational #' @export #' @examples -#' rel <- rel_from_df(mtcars) -#' rel2 <- rel_order(rel, list(relexpr_reference("hp"))) +#' +#' rel_order.dfrel <- function(rel, exprs, ...) { +#' df <- unclass(rel)[[1]] +#' +#' # A real implementation would evaluate the expressions defined +#' # by the exprs argument +#' new_dfrel(df[order(df[[1]]), ]) +#' } +#' +#' rel_order( +#' mtcars_rel, +#' list(relexpr_reference("mpg")) +#' ) rel_order <- function(rel, orders, ...) { rel_stats_env$rel_order <- (rel_stats_env$rel_order %||% 0L) + 1L UseMethod("rel_order") } -#' Lazily INNER join two relation objects +#' rel_join #' -#' TBD. +#' `rel_join()` joins or merges two tables, +#' to be used by [dplyr::left_join()], [dplyr::right_join()], +#' [dplyr::inner_join()], [dplyr::full_join()], [dplyr::cross_join()], +#' [dplyr::semi_join()], and [dplyr::anti_join()]. #' -#' @inheritParams rel_to_df -#' @param left the left-hand-side relation object -#' @param right the right-hand-side relation object -#' @param conds a list of DuckDB expressions to use for the join -#' @param join type of join -#' @param join_ref_type ref type of join -#' @return a new relation object resulting from the join +#' @param conds A list of expressions to use for the join. +#' @param join The type of join. +#' @rdname relational #' @export -#' @examples -#' \dontrun{ -#' left <- rel_from_df(mtcars) -#' right <- rel_from_df(mtcars) -#' cond <- list( -#' relexpr_function( -#' "eq", -#' list(relexpr_reference("cyl", left), relexpr_reference("cyl", right)) -#' ) -#' ) -#' rel2 <- rel_join(left, right, cond) +#' @examplesIf requireNamespace("dplyr", quietly = TRUE) +#' rel_join.dfrel <- function(left, right, conds, join, ...) { +#' left_df <- unclass(left)[[1]] +#' right_df <- unclass(right)[[1]] +#' +#' # A real implementation would evaluate the expressions +#' # defined by the conds argument, +#' # use different join types based on the join argument, +#' # and implement the join itself instead of relaying to left_join(). +#' new_dfrel(dplyr::left_join(left_df, right_df)) #' } +#' +#' rel_join(new_dfrel(data.frame(mpg = 21)), mtcars_rel) rel_join <- function(left, right, conds, join = c("inner", "left", "right", "outer", "cross", "semi", "anti"), - join_ref_type = c("regular", "natural", "cross", "positional", "asof"), ...) { rel_stats_env$rel_join <- (rel_stats_env$rel_join %||% 0L) + 1L UseMethod("rel_join") } -#' Lazily limit the rows in a relation object +#' rel_limit #' -#' TBD. +#' `rel_limit()` limits the number of rows in a table, +#' to be used by [utils::head()]. #' -#' @inheritParams rel_to_df #' @param n The number of rows. +#' @rdname relational #' @export +#' @examples +#' +#' rel_limit.dfrel <- function(rel, n, ...) { +#' df <- unclass(rel)[[1]] +#' +#' new_dfrel(df[seq_len(n), ]) +#' } +#' +#' rel_limit(mtcars_rel, 3) rel_limit <- function(rel, n, ...) { rel_stats_env$rel_limit <- (rel_stats_env$rel_limit %||% 0L) + 1L UseMethod("rel_limit") } -#' Lazily compute a distinct result on a relation object +#' rel_distinct() #' -#' TBD. +#' `rel_distinct()` only keeps the distinct rows in a table, +#' to be used by [dplyr::distinct()]. #' -#' @inheritParams rel_to_df -#' @return a new relation object with distinct rows +#' @rdname relational #' @export #' @examples -#' rel <- rel_from_df(mtcars) -#' rel2 <- rel_distinct(rel) +#' +#' rel_distinct.dfrel <- function(rel, ...) { +#' df <- unclass(rel)[[1]] +#' +#' new_dfrel(df[!duplicated(df), ]) +#' } +#' +#' rel_distinct(new_dfrel(mtcars[1:3, 1:4])) rel_distinct <- function(rel, ...) { rel_stats_env$rel_distinct <- (rel_stats_env$rel_distinct %||% 0L) + 1L UseMethod("rel_distinct") } -#' Lazily compute a set_intersect result on a relation object +#' rel_set_intersect() #' -#' TBD. +#' `rel_set_intersect()` returns rows present in both tables, +#' to be used by [intersect()]. #' -#' @inheritParams rel_to_df -#' @inheritParams rel_set_diff -#' @return a new relation object with the result +#' @rdname relational #' @export -#' @examples -#' rel <- rel_from_df(mtcars) -#' rel2 <- rel_set_intersect(rel) rel_set_intersect <- function(rel_a, rel_b, ...) { rel_stats_env$rel_set_intersect <- (rel_stats_env$rel_set_intersect %||% 0L) + 1L UseMethod("rel_set_intersect") } -#' Lazily compute a set_diff result on a relation object +#' rel_set_diff() #' -#' TBD. +#' `rel_set_diff()` returns rows present in any of both tables, +#' to be used by [setdiff()]. #' -#' @inheritParams rel_to_df -#' @param rel_a a DuckDB relation object -#' @param rel_b a DuckDB relation object -#' @return a new relation object with the result +#' @rdname relational #' @export -#' @examples -#' rel <- rel_from_df(mtcars) -#' rel2 <- rel_set_diff(rel) rel_set_diff <- function(rel_a, rel_b, ...) { rel_stats_env$rel_set_diff <- (rel_stats_env$rel_set_diff %||% 0L) + 1L UseMethod("rel_set_diff") } -#' Lazily compute a set_symdiff result on a relation object +#' rel_set_symdiff() #' -#' TBD. +#' `rel_set_symdiff()` returns rows present in any of both tables, +#' to be used by [dplyr::symdiff()]. #' -#' @inheritParams rel_to_df -#' @inheritParams rel_set_diff -#' @return a new relation object with the result +#' @rdname relational #' @export -#' @examples -#' rel <- rel_from_df(mtcars) -#' rel2 <- rel_set_symdiff(rel) rel_set_symdiff <- function(rel_a, rel_b, ...) { rel_stats_env$rel_set_symdiff <- (rel_stats_env$rel_set_symdiff %||% 0L) + 1L UseMethod("rel_set_symdiff") } -#' Lazily compute a set_union_all result on a relation object +#' rel_union_all() #' -#' TBD. +#' `rel_union_all()` returns rows present in any of both tables, +#' to be used by [dplyr::union_all()]. #' -#' @inheritParams rel_to_df -#' @inheritParams rel_set_diff -#' @return a new relation object with the result +#' @rdname relational #' @export -#' @examples -#' rel <- rel_from_df(mtcars) -#' rel2 <- rel_union_all(rel) rel_union_all <- function(rel_a, rel_b, ...) { rel_stats_env$rel_union_all <- (rel_stats_env$rel_union_all %||% 0L) + 1L UseMethod("rel_union_all") } -#' TBD -#' -#' TBD. -#' -#' @inheritParams rel_to_df -#' @rdname rel -#' @export -rel_tostring <- function(rel, ...) { - rel_stats_env$rel_tostring <- (rel_stats_env$rel_tostring %||% 0L) + 1L - UseMethod("rel_tostring") -} - -#' Print the EXPLAIN output for a relation object +#' rel_names() #' -#' TBD. +#' `rel_names()` returns the column names as character vector, +#' to be used by [colnames()]. #' -#' @inheritParams rel_to_df +#' @rdname relational #' @export #' @examples -#' rel <- rel_from_df(mtcars) -#' rel_explain(rel) -rel_explain <- function(rel, ...) { - rel_stats_env$rel_explain <- (rel_stats_env$rel_explain %||% 0L) + 1L - UseMethod("rel_explain") -} - -#' Get the internal alias for a relation object -#' -#' TBD. #' -#' @inheritParams rel_to_df -#' @export -#' @examples -#' rel <- rel_from_df(mtcars) -#' rel_alias(rel) -rel_alias <- function(rel, ...) { - rel_stats_env$rel_alias <- (rel_stats_env$rel_alias %||% 0L) + 1L - UseMethod("rel_alias") -} - -#' Set the internal alias for a relation object +#' rel_names.dfrel <- function(rel, ...) { +#' df <- unclass(rel)[[1]] #' -#' TBD. +#' names(df) +#' } #' -#' @inheritParams rel_to_df -#' @param alias the new alias -#' @export -#' @examples -#' rel <- rel_from_df(mtcars) -#' rel_set_alias(rel, "my_new_alias") -rel_set_alias <- function(rel, alias, ...) { - rel_stats_env$rel_set_alias <- (rel_stats_env$rel_set_alias %||% 0L) + 1L - UseMethod("rel_set_alias") -} - -#' @rdname rel -#' @export +#' rel_names(mtcars_rel) rel_names <- function(rel, ...) { rel_stats_env$rel_names <- (rel_stats_env$rel_names %||% 0L) + 1L UseMethod("rel_names") diff --git a/README.Rmd b/README.Rmd index 8eeb4932..846c532f 100644 --- a/README.Rmd +++ b/README.Rmd @@ -13,6 +13,8 @@ knitr::opts_chunk$set( ) pkgload::load_all() + +set.seed(20230702) ``` # duckplyr @@ -20,11 +22,18 @@ pkgload::load_all() -The goal of duckplyr is to ... +The goal of duckplyr is to provide a drop-in replacement for dplyr that uses DuckDB as a backend for fast operation. +It also defines a set of generics that provide a low-level implementer's interface for dplyr's high-level user interface. ## Installation -You can install the development version of duckplyr from [GitHub](https://github.com/) with: +Once on CRAN, you can install duckplyr with: + +``` r +install.packages("duckplyr") +``` + +You can also install the development version of duckplyr from [GitHub](https://github.com/) with: ``` r # install.packages("pak", repos = sprintf("https://r-lib.github.io/p/pak/stable/%s/%s/%s", .Platform$pkgType, R.Version()$os, R.Version()$arch)) @@ -61,3 +70,102 @@ out # Once computed, the results remain available as a data frame: out ``` + +## Extensibility + +This package only provides generics, for which other packages may then implement methods. + +```{r extensibility} +library(duckplyr) + +new_dfrel <- function(x) { + stopifnot(is.data.frame(x)) + new_relational(list(x), class = "dfrel") +} +mtcars_rel <- new_dfrel(mtcars[1:5, 1:4]) + +rel_to_df.dfrel <- function(rel, ...) { + unclass(rel)[[1]] +} +rel_to_df(mtcars_rel) + +rel_filter.dfrel <- function(rel, exprs, ...) { + df <- unclass(rel)[[1]] + + # A real implementation would evaluate the predicates defined + # by the exprs argument + new_dfrel(df[sample.int(nrow(df), 3, replace = TRUE), ]) +} + +rel_filter( + mtcars_rel, + list( + relexpr_function( + "gt", + list(relexpr_reference("cyl"), relexpr_constant("6")) + ) + ) +) + +rel_project.dfrel <- function(rel, exprs, ...) { + df <- unclass(rel)[[1]] + + # A real implementation would evaluate the expressions defined + # by the exprs argument + new_dfrel(df[seq_len(min(3, ncol(df)))]) +} + +rel_project( + mtcars_rel, + list(relexpr_reference("cyl"), relexpr_reference("disp")) +) + +rel_order.dfrel <- function(rel, exprs, ...) { + df <- unclass(rel)[[1]] + + # A real implementation would evaluate the expressions defined + # by the exprs argument + new_dfrel(df[order(df[[1]]), ]) +} + +rel_order( + mtcars_rel, + list(relexpr_reference("mpg")) +) +rel_join.dfrel <- function(left, right, conds, join, ...) { + left_df <- unclass(left)[[1]] + right_df <- unclass(right)[[1]] + + # A real implementation would evaluate the expressions + # defined by the conds argument, + # use different join types based on the join argument, + # and implement the join itself instead of relaying to left_join(). + new_dfrel(dplyr::left_join(left_df, right_df)) +} + +rel_join(new_dfrel(data.frame(mpg = 21)), mtcars_rel) + +rel_limit.dfrel <- function(rel, n, ...) { + df <- unclass(rel)[[1]] + + new_dfrel(df[seq_len(n), ]) +} + +rel_limit(mtcars_rel, 3) + +rel_distinct.dfrel <- function(rel, ...) { + df <- unclass(rel)[[1]] + + new_dfrel(df[!duplicated(df), ]) +} + +rel_distinct(new_dfrel(mtcars[1:3, 1:4])) + +rel_names.dfrel <- function(rel, ...) { + df <- unclass(rel)[[1]] + + names(df) +} + +rel_names(mtcars_rel) +``` diff --git a/README.md b/README.md index 0f2c73c8..41dec4a7 100644 --- a/README.md +++ b/README.md @@ -6,15 +6,20 @@ -The goal of duckplyr is to … +The goal of duckplyr is to provide a drop-in replacement for dplyr that uses DuckDB as a backend for fast operation. It also defines a set of generics that provide a low-level implementer’s interface for dplyr’s high-level user interface. ## Installation -You can install the development version of duckplyr from [GitHub](https://github.com/) with: +Once on CRAN, you can install duckplyr with: + +
+install.packages("duckplyr")
+ +You can also install the development version of duckplyr from [GitHub](https://github.com/) with:
 # install.packages("pak", repos = sprintf("https://r-lib.github.io/p/pak/stable/%s/%s/%s", .Platform$pkgType, R.Version()$os, R.Version()$arch))
-pak::pak("duckdblabs/duckplyr")
+pak::pak("duckdblabs/duckplyr") ## Example @@ -26,9 +31,9 @@ This is a basic example which shows you how to solve a common problem: library(duckplyr) # Use `as_duckplyr_df()` to enable processing with duckdb: -out <- - palmerpenguins::penguins %>% - as_duckplyr_df() %>% +out <- + palmerpenguins::penguins %>% + as_duckplyr_df() %>% transmute(bill_area = bill_length_mm * bill_depth_mm, bill_length_mm, species, sex) %>% filter(bill_length_mm < 40) %>% select(-bill_length_mm) @@ -40,46 +45,12 @@ This is a basic example which shows you how to solve a common problem: #> [1] "bill_area" "species" "sex" # duckdb is responsible for eventually carrying out the operations: -out %>% +out %>% explain() -#> ┌───────────────────────────┐ -#> │ PROJECTION │ -#> │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ -#> │ bill_area │ -#> │ species │ -#> │ sex │ -#> └─────────────┬─────────────┘ -#> ┌─────────────┴─────────────┐ -#> │ FILTER │ -#> │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ -#> │ (bill_length_mm < 40.0) │ -#> └─────────────┬─────────────┘ -#> ┌─────────────┴─────────────┐ -#> │ R_DATAFRAME_SCAN │ -#> │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ -#> │ data.frame │ -#> │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ -#> │ EC=344 │ -#> └───────────────────────────┘ +#> Can't convert to relational, fallback implementation will be used. # The contents of this data frame are computed only upon request: out -#> materializing: -#> --------------------- -#> --- Relation Tree --- -#> --------------------- -#> Projection [bill_area as bill_area, species as species, sex as sex] -#> Filter [<(bill_length_mm, 40.0)] -#> Projection [*(bill_length_mm, bill_depth_mm) as bill_area, bill_length_mm as bill_length_mm, species as species, sex as sex] -#> r_dataframe_scan(0x12c0a8a58) -#> -#> --------------------- -#> -- Result Columns -- -#> --------------------- -#> - bill_area (DOUBLE) -#> - species (species) -#> - sex (sex) -#> #> # A tibble: 100 × 3 #> bill_area species sex #> <dbl> <fct> <fct> @@ -93,7 +64,7 @@ This is a basic example which shows you how to solve a common problem: #> 8 646. Adelie NA #> 9 654. Adelie NA #> 10 818. Adelie male -#> # … with 90 more rows +#> # ℹ 90 more rows # Once computed, the results remain available as a data frame: out @@ -110,4 +81,160 @@ This is a basic example which shows you how to solve a common problem: #> 8 646. Adelie NA #> 9 654. Adelie NA #> 10 818. Adelie male -#> # … with 90 more rows +#> # ℹ 90 more rows + +## Extensibility + +This package only provides generics, for which other packages may then implement methods. + +
+library(duckplyr)
+
+new_dfrel <- function(x) {
+  stopifnot(is.data.frame(x))
+  new_relational(list(x), class = "dfrel")
+}
+mtcars_rel <- new_dfrel(mtcars[1:5, 1:4])
+
+rel_to_df.dfrel <- function(rel, ...) {
+  unclass(rel)[[1]]
+}
+rel_to_df(mtcars_rel)
+#>                    mpg cyl disp  hp
+#> Mazda RX4         21.0   6  160 110
+#> Mazda RX4 Wag     21.0   6  160 110
+#> Datsun 710        22.8   4  108  93
+#> Hornet 4 Drive    21.4   6  258 110
+#> Hornet Sportabout 18.7   8  360 175
+
+rel_filter.dfrel <- function(rel, exprs, ...) {
+  df <- unclass(rel)[[1]]
+
+  # A real implementation would evaluate the predicates defined
+  # by the exprs argument
+  new_dfrel(df[sample.int(nrow(df), 3, replace = TRUE), ])
+}
+
+rel_filter(
+  mtcars_rel,
+  list(
+    relexpr_function(
+      "gt",
+      list(relexpr_reference("cyl"), relexpr_constant("6"))
+    )
+  )
+)
+#> [[1]]
+#>                  mpg cyl disp  hp
+#> Mazda RX4 Wag   21.0   6  160 110
+#> Mazda RX4 Wag.1 21.0   6  160 110
+#> Datsun 710      22.8   4  108  93
+#> 
+#> attr(,"class")
+#> [1] "dfrel"      "relational"
+
+rel_project.dfrel <- function(rel, exprs, ...) {
+  df <- unclass(rel)[[1]]
+
+  # A real implementation would evaluate the expressions defined
+  # by the exprs argument
+  new_dfrel(df[seq_len(min(3, ncol(df)))])
+}
+
+rel_project(
+  mtcars_rel,
+  list(relexpr_reference("cyl"), relexpr_reference("disp"))
+)
+#> [[1]]
+#>                    mpg cyl disp
+#> Mazda RX4         21.0   6  160
+#> Mazda RX4 Wag     21.0   6  160
+#> Datsun 710        22.8   4  108
+#> Hornet 4 Drive    21.4   6  258
+#> Hornet Sportabout 18.7   8  360
+#> 
+#> attr(,"class")
+#> [1] "dfrel"      "relational"
+
+rel_order.dfrel <- function(rel, exprs, ...) {
+  df <- unclass(rel)[[1]]
+
+  # A real implementation would evaluate the expressions defined
+  # by the exprs argument
+  new_dfrel(df[order(df[[1]]), ])
+}
+
+rel_order(
+  mtcars_rel,
+  list(relexpr_reference("mpg"))
+)
+#> [[1]]
+#>                    mpg cyl disp  hp
+#> Hornet Sportabout 18.7   8  360 175
+#> Mazda RX4         21.0   6  160 110
+#> Mazda RX4 Wag     21.0   6  160 110
+#> Hornet 4 Drive    21.4   6  258 110
+#> Datsun 710        22.8   4  108  93
+#> 
+#> attr(,"class")
+#> [1] "dfrel"      "relational"
+rel_join.dfrel <- function(left, right, conds, join, ...) {
+  left_df <- unclass(left)[[1]]
+  right_df <- unclass(right)[[1]]
+
+  # A real implementation would evaluate the expressions
+  # defined by the conds argument,
+  # use different join types based on the join argument,
+  # and implement the join itself instead of relaying to left_join().
+  new_dfrel(dplyr::left_join(left_df, right_df))
+}
+
+rel_join(new_dfrel(data.frame(mpg = 21)), mtcars_rel)
+#> Joining with `by = join_by(mpg)`
+#> [[1]]
+#>   mpg cyl disp  hp
+#> 1  21   6  160 110
+#> 2  21   6  160 110
+#> 
+#> attr(,"class")
+#> [1] "dfrel"      "relational"
+
+rel_limit.dfrel <- function(rel, n, ...) {
+  df <- unclass(rel)[[1]]
+
+  new_dfrel(df[seq_len(n), ])
+}
+
+rel_limit(mtcars_rel, 3)
+#> [[1]]
+#>                mpg cyl disp  hp
+#> Mazda RX4     21.0   6  160 110
+#> Mazda RX4 Wag 21.0   6  160 110
+#> Datsun 710    22.8   4  108  93
+#> 
+#> attr(,"class")
+#> [1] "dfrel"      "relational"
+
+rel_distinct.dfrel <- function(rel, ...) {
+  df <- unclass(rel)[[1]]
+
+  new_dfrel(df[!duplicated(df), ])
+}
+
+rel_distinct(new_dfrel(mtcars[1:3, 1:4]))
+#> [[1]]
+#>             mpg cyl disp  hp
+#> Mazda RX4  21.0   6  160 110
+#> Datsun 710 22.8   4  108  93
+#> 
+#> attr(,"class")
+#> [1] "dfrel"      "relational"
+
+rel_names.dfrel <- function(rel, ...) {
+  df <- unclass(rel)[[1]]
+
+  names(df)
+}
+
+rel_names(mtcars_rel)
+#> [1] "mpg"  "cyl"  "disp" "hp"
diff --git a/_pkgdown.yml b/_pkgdown.yml new file mode 100644 index 00000000..e3aeeffc --- /dev/null +++ b/_pkgdown.yml @@ -0,0 +1,4 @@ +url: https://duckdblabs.github.io/duckplyr/ +template: + bootstrap: 5 + diff --git a/cran-comments.md b/cran-comments.md new file mode 100644 index 00000000..abd6a110 --- /dev/null +++ b/cran-comments.md @@ -0,0 +1,3 @@ +duckplyr 0.1.0 + +Initial release. diff --git a/man/duckplyr-package.Rd b/man/duckplyr-package.Rd index 2279505b..9c3107f6 100644 --- a/man/duckplyr-package.Rd +++ b/man/duckplyr-package.Rd @@ -6,7 +6,7 @@ \alias{duckplyr-package} \title{duckplyr: A DuckDB-backed version of dplyr} \description{ -Implements a subset of dplyr using DuckDB. +A drop-in replacement for dplyr, powered by DuckDB for performance. Also defines a set of generics that provide a low-level implementer's interface for the high-level user interface of dplyr. } \seealso{ Useful links: @@ -17,16 +17,16 @@ Useful links: } \author{ -\strong{Maintainer}: Hannes Mühleisen \email{hannes@duckdblabs.com} (\href{https://orcid.org/0000-0001-8552-0029}{ORCID}) +\strong{Maintainer}: Kirill Müller \email{kirill@cynkra.com} (\href{https://orcid.org/0000-0002-1416-3412}{ORCID}) Authors: \itemize{ - \item Kirill Müller (\href{https://orcid.org/0000-0002-1416-3412}{ORCID}) + \item Hannes Mühleisen (\href{https://orcid.org/0000-0001-8552-0029}{ORCID}) } Other contributors: \itemize{ - \item Posit [copyright holder, funder] + \item Posit Software, PBC [copyright holder, funder] } } diff --git a/man/expr.Rd b/man/expr.Rd index 67ff6855..422121d1 100644 --- a/man/expr.Rd +++ b/man/expr.Rd @@ -54,6 +54,45 @@ relexpr_set_alias(expr, alias = NULL) \item{default_expr}{default relational expression.} } +\value{ +an object of class \code{"relational_relexpr"} + +an object of class \code{"relational_relexpr"} + +an object of class \code{"relational_relexpr"} + +an object of class \code{"relational_relexpr"} + +an object of class \code{"relational_relexpr"} +} \description{ -TBD. +These functions provide a backend-agnostic way to construct expression trees +built of column references, constants, and functions. +All subexpressions in an expression tree can have an alias. + +\code{new_relexpr()} constructs an object of class \code{"relational_relexpr"}. +It is used by the higher-level constructors, +users should rarely need to call it directly. + +\code{relexpr_reference()} constructs a reference to a column. + +\code{relexpr_constant()} wraps a constant value. + +\code{relexpr_function()} applies a function. +The arguments to this function are a list of other expression objects. + +\code{relexpr_window()} applies a function over a window, +similarly to the SQL \code{OVER} clause. + +\code{relexpr_set_alias()} assigns an alias to an expression. +} +\examples{ +relexpr_set_alias(alias = "my_predicate", + relexpr_function("<", + list( + relexpr_reference("my_number"), + relexpr_constant(42) + ) + ) +) } diff --git a/man/methods_overwrite.Rd b/man/methods_overwrite.Rd deleted file mode 100644 index 51875b46..00000000 --- a/man/methods_overwrite.Rd +++ /dev/null @@ -1,11 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/overwrite.R -\name{methods_overwrite} -\alias{methods_overwrite} -\title{Methods overwriting} -\usage{ -methods_overwrite() -} -\description{ -Methods overwriting -} diff --git a/man/methods_restore.Rd b/man/methods_restore.Rd deleted file mode 100644 index 3f0d65c3..00000000 --- a/man/methods_restore.Rd +++ /dev/null @@ -1,11 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/restore.R -\name{methods_restore} -\alias{methods_restore} -\title{Methods restoring} -\usage{ -methods_restore() -} -\description{ -Methods restoring -} diff --git a/man/new_relational.Rd b/man/new_relational.Rd deleted file mode 100644 index d4184a93..00000000 --- a/man/new_relational.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/relational-rel.R -\name{new_relational} -\alias{new_relational} -\title{Relational API} -\usage{ -new_relational(..., class = NULL) -} -\arguments{ -\item{...}{Passed on to \code{\link[=structure]{structure()}}} - -\item{class}{Classes added in front of the \code{"relational"} base class} -} -\description{ -TBD. -} diff --git a/man/rel.Rd b/man/rel.Rd deleted file mode 100644 index 6126c1d7..00000000 --- a/man/rel.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/relational-rel.R -\name{rel_tostring} -\alias{rel_tostring} -\alias{rel_names} -\title{TBD} -\usage{ -rel_tostring(rel, ...) - -rel_names(rel, ...) -} -\arguments{ -\item{rel}{The relation object.} - -\item{...}{Reserved for future extensions, must be empty.} -} -\description{ -TBD. -} diff --git a/man/rel_aggregate.Rd b/man/rel_aggregate.Rd deleted file mode 100644 index 4324119f..00000000 --- a/man/rel_aggregate.Rd +++ /dev/null @@ -1,28 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/relational-rel.R -\name{rel_aggregate} -\alias{rel_aggregate} -\title{Lazily aggregate a relation object} -\usage{ -rel_aggregate(rel, groups, aggregates, ...) -} -\arguments{ -\item{rel}{The relation object.} - -\item{groups}{a list of DuckDB expressions to group by} - -\item{aggregates}{a (optionally named) list of DuckDB expressions with aggregates to compute} - -\item{...}{Reserved for future extensions, must be empty.} -} -\value{ -the now aggregated relation object -} -\description{ -TBD. -} -\examples{ -rel <- rel_from_df(mtcars) -aggrs <- list(avg_hp = relexpr_function("avg", list(relexpr_reference("hp")))) -rel2 <- rel_aggregate(rel, list(relexpr_reference("cyl")), aggrs) -} diff --git a/man/rel_alias.Rd b/man/rel_alias.Rd deleted file mode 100644 index f32abc1c..00000000 --- a/man/rel_alias.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/relational-rel.R -\name{rel_alias} -\alias{rel_alias} -\title{Get the internal alias for a relation object} -\usage{ -rel_alias(rel, ...) -} -\arguments{ -\item{rel}{The relation object.} - -\item{...}{Reserved for future extensions, must be empty.} -} -\description{ -TBD. -} -\examples{ -rel <- rel_from_df(mtcars) -rel_alias(rel) -} diff --git a/man/rel_distinct.Rd b/man/rel_distinct.Rd deleted file mode 100644 index 1d5cb27c..00000000 --- a/man/rel_distinct.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/relational-rel.R -\name{rel_distinct} -\alias{rel_distinct} -\title{Lazily compute a distinct result on a relation object} -\usage{ -rel_distinct(rel, ...) -} -\arguments{ -\item{rel}{The relation object.} - -\item{...}{Reserved for future extensions, must be empty.} -} -\value{ -a new relation object with distinct rows -} -\description{ -TBD. -} -\examples{ -rel <- rel_from_df(mtcars) -rel2 <- rel_distinct(rel) -} diff --git a/man/rel_explain.Rd b/man/rel_explain.Rd deleted file mode 100644 index 58f6499c..00000000 --- a/man/rel_explain.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/relational-rel.R -\name{rel_explain} -\alias{rel_explain} -\title{Print the EXPLAIN output for a relation object} -\usage{ -rel_explain(rel, ...) -} -\arguments{ -\item{rel}{The relation object.} - -\item{...}{Reserved for future extensions, must be empty.} -} -\description{ -TBD. -} -\examples{ -rel <- rel_from_df(mtcars) -rel_explain(rel) -} diff --git a/man/rel_filter.Rd b/man/rel_filter.Rd deleted file mode 100644 index 49de0bad..00000000 --- a/man/rel_filter.Rd +++ /dev/null @@ -1,32 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/relational-rel.R -\name{rel_filter} -\alias{rel_filter} -\title{Lazily filter a relation object} -\usage{ -rel_filter(rel, exprs, ...) -} -\arguments{ -\item{rel}{The relation object.} - -\item{exprs}{a list of DuckDB expressions to filter by} - -\item{...}{Reserved for future extensions, must be empty.} -} -\value{ -the now filtered relation object -} -\description{ -TBD. -} -\examples{ -rel <- rel_from_df(mtcars) -rel2 <- rel_filter( - rel, - list( - relexpr_function( - "gt", - list(relexpr_reference("cyl"), relexpr_constant("6"))) - ) -) -} diff --git a/man/rel_join.Rd b/man/rel_join.Rd deleted file mode 100644 index 576e9757..00000000 --- a/man/rel_join.Rd +++ /dev/null @@ -1,47 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/relational-rel.R -\name{rel_join} -\alias{rel_join} -\title{Lazily INNER join two relation objects} -\usage{ -rel_join( - left, - right, - conds, - join = c("inner", "left", "right", "outer", "cross", "semi", "anti"), - join_ref_type = c("regular", "natural", "cross", "positional", "asof"), - ... -) -} -\arguments{ -\item{left}{the left-hand-side relation object} - -\item{right}{the right-hand-side relation object} - -\item{conds}{a list of DuckDB expressions to use for the join} - -\item{join}{type of join} - -\item{join_ref_type}{ref type of join} - -\item{...}{Reserved for future extensions, must be empty.} -} -\value{ -a new relation object resulting from the join -} -\description{ -TBD. -} -\examples{ -\dontrun{ -left <- rel_from_df(mtcars) -right <- rel_from_df(mtcars) -cond <- list( - relexpr_function( - "eq", - list(relexpr_reference("cyl", left), relexpr_reference("cyl", right)) - ) -) -rel2 <- rel_join(left, right, cond) -} -} diff --git a/man/rel_limit.Rd b/man/rel_limit.Rd deleted file mode 100644 index 3d63b1e0..00000000 --- a/man/rel_limit.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/relational-rel.R -\name{rel_limit} -\alias{rel_limit} -\title{Lazily limit the rows in a relation object} -\usage{ -rel_limit(rel, n, ...) -} -\arguments{ -\item{rel}{The relation object.} - -\item{n}{The number of rows.} - -\item{...}{Reserved for future extensions, must be empty.} -} -\description{ -TBD. -} diff --git a/man/rel_order.Rd b/man/rel_order.Rd deleted file mode 100644 index 191abe1f..00000000 --- a/man/rel_order.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/relational-rel.R -\name{rel_order} -\alias{rel_order} -\title{Lazily reorder a relation object} -\usage{ -rel_order(rel, orders, ...) -} -\arguments{ -\item{rel}{The relation object.} - -\item{orders}{a list of DuckDB expressions to order by} - -\item{...}{Reserved for future extensions, must be empty.} -} -\value{ -the now aggregated relation object -} -\description{ -TBD. -} -\examples{ -rel <- rel_from_df(mtcars) -rel2 <- rel_order(rel, list(relexpr_reference("hp"))) -} diff --git a/man/rel_project.Rd b/man/rel_project.Rd deleted file mode 100644 index bb98002f..00000000 --- a/man/rel_project.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/relational-rel.R -\name{rel_project} -\alias{rel_project} -\title{Lazily project a relation object} -\usage{ -rel_project(rel, exprs, ...) -} -\arguments{ -\item{rel}{The relation object.} - -\item{exprs}{a list of DuckDB expressions to project} - -\item{...}{Reserved for future extensions, must be empty.} -} -\value{ -the now projected relation object -} -\description{ -TBD. -} -\examples{ -rel <- rel_from_df(mtcars) -rel2 <- rel_project(rel, list(relexpr_reference("cyl"), relexpr_reference("disp"))) -} diff --git a/man/rel_set_alias.Rd b/man/rel_set_alias.Rd deleted file mode 100644 index 6d7b1f37..00000000 --- a/man/rel_set_alias.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/relational-rel.R -\name{rel_set_alias} -\alias{rel_set_alias} -\title{Set the internal alias for a relation object} -\usage{ -rel_set_alias(rel, alias, ...) -} -\arguments{ -\item{rel}{The relation object.} - -\item{alias}{the new alias} - -\item{...}{Reserved for future extensions, must be empty.} -} -\description{ -TBD. -} -\examples{ -rel <- rel_from_df(mtcars) -rel_set_alias(rel, "my_new_alias") -} diff --git a/man/rel_set_diff.Rd b/man/rel_set_diff.Rd deleted file mode 100644 index 0bde911e..00000000 --- a/man/rel_set_diff.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/relational-rel.R -\name{rel_set_diff} -\alias{rel_set_diff} -\title{Lazily compute a set_diff result on a relation object} -\usage{ -rel_set_diff(rel_a, rel_b, ...) -} -\arguments{ -\item{rel_a}{a DuckDB relation object} - -\item{rel_b}{a DuckDB relation object} - -\item{...}{Reserved for future extensions, must be empty.} -} -\value{ -a new relation object with the result -} -\description{ -TBD. -} -\examples{ -rel <- rel_from_df(mtcars) -rel2 <- rel_set_diff(rel) -} diff --git a/man/rel_set_intersect.Rd b/man/rel_set_intersect.Rd deleted file mode 100644 index 8f39996a..00000000 --- a/man/rel_set_intersect.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/relational-rel.R -\name{rel_set_intersect} -\alias{rel_set_intersect} -\title{Lazily compute a set_intersect result on a relation object} -\usage{ -rel_set_intersect(rel_a, rel_b, ...) -} -\arguments{ -\item{rel_a}{a DuckDB relation object} - -\item{rel_b}{a DuckDB relation object} - -\item{...}{Reserved for future extensions, must be empty.} -} -\value{ -a new relation object with the result -} -\description{ -TBD. -} -\examples{ -rel <- rel_from_df(mtcars) -rel2 <- rel_set_intersect(rel) -} diff --git a/man/rel_set_symdiff.Rd b/man/rel_set_symdiff.Rd deleted file mode 100644 index 827d0124..00000000 --- a/man/rel_set_symdiff.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/relational-rel.R -\name{rel_set_symdiff} -\alias{rel_set_symdiff} -\title{Lazily compute a set_symdiff result on a relation object} -\usage{ -rel_set_symdiff(rel_a, rel_b, ...) -} -\arguments{ -\item{rel_a}{a DuckDB relation object} - -\item{rel_b}{a DuckDB relation object} - -\item{...}{Reserved for future extensions, must be empty.} -} -\value{ -a new relation object with the result -} -\description{ -TBD. -} -\examples{ -rel <- rel_from_df(mtcars) -rel2 <- rel_set_symdiff(rel) -} diff --git a/man/rel_to_df.Rd b/man/rel_to_df.Rd deleted file mode 100644 index 7d2ce8df..00000000 --- a/man/rel_to_df.Rd +++ /dev/null @@ -1,31 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/relational-rel.R -\name{rel_to_df} -\alias{rel_to_df} -\title{Convert a relation object to a data frame} -\usage{ -rel_to_df(rel, ...) -} -\arguments{ -\item{rel}{The relation object.} - -\item{...}{Reserved for future extensions, must be empty.} -} -\value{ -A data frame. -} -\description{ -TBD. -} -\examples{ -rel <- rel_from_df(mtcars) -rel2 <- rel_filter( - rel, - list( - relexpr_function( - "gt", - list(relexpr_reference("cyl"), relexpr_constant("6")) - ) - ) - ) -} diff --git a/man/rel_union_all.Rd b/man/rel_union_all.Rd deleted file mode 100644 index c5f3f8c3..00000000 --- a/man/rel_union_all.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/relational-rel.R -\name{rel_union_all} -\alias{rel_union_all} -\title{Lazily compute a set_union_all result on a relation object} -\usage{ -rel_union_all(rel_a, rel_b, ...) -} -\arguments{ -\item{rel_a}{a DuckDB relation object} - -\item{rel_b}{a DuckDB relation object} - -\item{...}{Reserved for future extensions, must be empty.} -} -\value{ -a new relation object with the result -} -\description{ -TBD. -} -\examples{ -rel <- rel_from_df(mtcars) -rel2 <- rel_union_all(rel) -} diff --git a/man/relational.Rd b/man/relational.Rd new file mode 100644 index 00000000..69edbb53 --- /dev/null +++ b/man/relational.Rd @@ -0,0 +1,233 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/relational-rel.R +\name{relational} +\alias{relational} +\alias{new_relational} +\alias{rel_to_df} +\alias{rel_filter} +\alias{rel_project} +\alias{rel_aggregate} +\alias{rel_order} +\alias{rel_join} +\alias{rel_limit} +\alias{rel_distinct} +\alias{rel_set_intersect} +\alias{rel_set_diff} +\alias{rel_set_symdiff} +\alias{rel_union_all} +\alias{rel_names} +\title{Relational implementer's interface} +\usage{ +new_relational(..., class = NULL) + +rel_to_df(rel, ...) + +rel_filter(rel, exprs, ...) + +rel_project(rel, exprs, ...) + +rel_aggregate(rel, groups, aggregates, ...) + +rel_order(rel, orders, ...) + +rel_join( + left, + right, + conds, + join = c("inner", "left", "right", "outer", "cross", "semi", "anti"), + ... +) + +rel_limit(rel, n, ...) + +rel_distinct(rel, ...) + +rel_set_intersect(rel_a, rel_b, ...) + +rel_set_diff(rel_a, rel_b, ...) + +rel_set_symdiff(rel_a, rel_b, ...) + +rel_union_all(rel_a, rel_b, ...) + +rel_names(rel, ...) +} +\arguments{ +\item{...}{Reserved for future extensions, must be empty.} + +\item{class}{Classes added in front of the \code{"relational"} base class.} + +\item{rel, rel_a, rel_b, left, right}{A relational object.} + +\item{exprs}{A list of \link{expr} objects to filter by.} + +\item{groups}{A list of expressions to group by.} + +\item{aggregates}{A list of expressions with aggregates to compute.} + +\item{orders}{A list of expressions to order by.} + +\item{conds}{A list of expressions to use for the join.} + +\item{join}{The type of join.} + +\item{n}{The number of rows.} +} +\value{ +\itemize{ +\item \code{new_relational()} returns a new relational object. +\item \code{rel_to_df()} returns a data frame. +\item \code{rel_names()} returns a character vector. +\item All other generics return a modified relational object. +} +} +\description{ +The constructor and generics described here define a class +that helps separating dplyr's user interface from the actual underlying operations. +In the longer term, this will help packages that implement the dplyr interface +(such as \pkg{dbplyr}, \pkg{dtplyr}, \pkg{arrow} and similar) +to focus on the core details of their functionality, +rather than on the intricacies of dplyr's user interface. + +\code{new_relational()} constructs an object of class \code{"relational"}. +Users are encouraged to provide the \code{class} argument. +The typical use case will be to create a wrapper function. + +\code{rel_to_df()} extracts a data frame representation from a relational object, +to be used by \code{\link[dplyr:compute]{dplyr::collect()}}. + +\code{rel_filter()} keeps rows that match a predicate, +to be used by \code{\link[dplyr:filter]{dplyr::filter()}}. + +\code{rel_project()} selects columns or creates new columns, +to be used by \code{\link[dplyr:select]{dplyr::select()}}, \code{\link[dplyr:rename]{dplyr::rename()}}, +\code{\link[dplyr:mutate]{dplyr::mutate()}}, \code{\link[dplyr:relocate]{dplyr::relocate()}}, and others. + +\code{rel_aggregate()} combines several rows into one, +to be used by \code{\link[dplyr:summarise]{dplyr::summarize()}}. + +\code{rel_order()} reorders rows by columns or expressions, +to be used by \code{\link[dplyr:arrange]{dplyr::arrange()}}. + +\code{rel_join()} joins or merges two tables, +to be used by \code{\link[dplyr:mutate-joins]{dplyr::left_join()}}, \code{\link[dplyr:mutate-joins]{dplyr::right_join()}}, +\code{\link[dplyr:mutate-joins]{dplyr::inner_join()}}, \code{\link[dplyr:mutate-joins]{dplyr::full_join()}}, \code{\link[dplyr:cross_join]{dplyr::cross_join()}}, +\code{\link[dplyr:filter-joins]{dplyr::semi_join()}}, and \code{\link[dplyr:filter-joins]{dplyr::anti_join()}}. + +\code{rel_limit()} limits the number of rows in a table, +to be used by \code{\link[utils:head]{utils::head()}}. + +\code{rel_distinct()} only keeps the distinct rows in a table, +to be used by \code{\link[dplyr:distinct]{dplyr::distinct()}}. + +\code{rel_set_intersect()} returns rows present in both tables, +to be used by \code{\link[=intersect]{intersect()}}. + +\code{rel_set_diff()} returns rows present in any of both tables, +to be used by \code{\link[=setdiff]{setdiff()}}. + +\code{rel_set_symdiff()} returns rows present in any of both tables, +to be used by \code{\link[dplyr:setops]{dplyr::symdiff()}}. + +\code{rel_union_all()} returns rows present in any of both tables, +to be used by \code{\link[dplyr:setops]{dplyr::union_all()}}. + +\code{rel_names()} returns the column names as character vector, +to be used by \code{\link[=colnames]{colnames()}}. +} +\examples{ +new_dfrel <- function(x) { + stopifnot(is.data.frame(x)) + new_relational(list(x), class = "dfrel") +} +mtcars_rel <- new_dfrel(mtcars[1:5, 1:4]) + +rel_to_df.dfrel <- function(rel, ...) { + unclass(rel)[[1]] +} +rel_to_df(mtcars_rel) +\dontshow{if ({ set.seed(20230630); TRUE }) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} + +rel_filter.dfrel <- function(rel, exprs, ...) { + df <- unclass(rel)[[1]] + + # A real implementation would evaluate the predicates defined + # by the exprs argument + new_dfrel(df[sample.int(nrow(df), 3, replace = TRUE), ]) +} + +rel_filter( + mtcars_rel, + list( + relexpr_function( + "gt", + list(relexpr_reference("cyl"), relexpr_constant("6")) + ) + ) +) +\dontshow{\}) # examplesIf} + +rel_project.dfrel <- function(rel, exprs, ...) { + df <- unclass(rel)[[1]] + + # A real implementation would evaluate the expressions defined + # by the exprs argument + new_dfrel(df[seq_len(min(3, ncol(df)))]) +} + +rel_project( + mtcars_rel, + list(relexpr_reference("cyl"), relexpr_reference("disp")) +) + +rel_order.dfrel <- function(rel, exprs, ...) { + df <- unclass(rel)[[1]] + + # A real implementation would evaluate the expressions defined + # by the exprs argument + new_dfrel(df[order(df[[1]]), ]) +} + +rel_order( + mtcars_rel, + list(relexpr_reference("mpg")) +) +\dontshow{if (requireNamespace("dplyr", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +rel_join.dfrel <- function(left, right, conds, join, ...) { + left_df <- unclass(left)[[1]] + right_df <- unclass(right)[[1]] + + # A real implementation would evaluate the expressions + # defined by the conds argument, + # use different join types based on the join argument, + # and implement the join itself instead of relaying to left_join(). + new_dfrel(dplyr::left_join(left_df, right_df)) +} + +rel_join(new_dfrel(data.frame(mpg = 21)), mtcars_rel) +\dontshow{\}) # examplesIf} + +rel_limit.dfrel <- function(rel, n, ...) { + df <- unclass(rel)[[1]] + + new_dfrel(df[seq_len(n), ]) +} + +rel_limit(mtcars_rel, 3) + +rel_distinct.dfrel <- function(rel, ...) { + df <- unclass(rel)[[1]] + + new_dfrel(df[!duplicated(df), ]) +} + +rel_distinct(new_dfrel(mtcars[1:3, 1:4])) + +rel_names.dfrel <- function(rel, ...) { + df <- unclass(rel)[[1]] + + names(df) +} + +rel_names(mtcars_rel) +} diff --git a/tests/testthat/_snaps/relational-rel.md b/tests/testthat/_snaps/relational-rel.md new file mode 100644 index 00000000..336fdc89 --- /dev/null +++ b/tests/testthat/_snaps/relational-rel.md @@ -0,0 +1,9 @@ +# new_relational() + + Code + new_relational(list()) + Output + list() + attr(,"class") + [1] "relational" + diff --git a/tests/testthat/test-relational-rel.R b/tests/testthat/test-relational-rel.R new file mode 100644 index 00000000..02f7b95a --- /dev/null +++ b/tests/testthat/test-relational-rel.R @@ -0,0 +1,5 @@ +test_that("new_relational()", { + expect_snapshot({ + new_relational(list()) + }) +}) diff --git a/tools/spelling.R b/tools/spelling.R new file mode 100644 index 00000000..b3d5340d --- /dev/null +++ b/tools/spelling.R @@ -0,0 +1,6 @@ +words <- c("dplyr", "implementer", "DuckDB", "symdiff") +saveRDS(words, file = ".aspell/duckplyr.rds", version = 2) + + + + From 450bc0fd9e0b9b400435600ade483624588634f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Sat, 9 Sep 2023 20:43:53 +0200 Subject: [PATCH 2/8] Move import --- R/duckplyr-package.R | 1 + R/explain.R | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/R/duckplyr-package.R b/R/duckplyr-package.R index b5ea30ff..3179b288 100644 --- a/R/duckplyr-package.R +++ b/R/duckplyr-package.R @@ -28,6 +28,7 @@ #' @importFrom dplyr dplyr_col_modify #' @importFrom dplyr dplyr_reconstruct #' @importFrom dplyr dplyr_row_slice +#' @importFrom dplyr explain #' @importFrom dplyr filter #' @importFrom dplyr first #' @importFrom dplyr full_join diff --git a/R/explain.R b/R/explain.R index 07580c96..4e6239bc 100644 --- a/R/explain.R +++ b/R/explain.R @@ -1,4 +1,3 @@ -#' @importFrom dplyr explain #' @export explain.duckplyr_df <- function(x, ...) { rel_try({ From b42226696f04940534d1b801c97182a955a18282 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Sat, 9 Sep 2023 20:49:16 +0200 Subject: [PATCH 3/8] Reexport --- NAMESPACE | 98 ++++++++++++++ R/duckplyr-package.R | 296 +++++++++++++++++++++++++++++++++++++++++++ README.Rmd | 5 +- README.md | 16 ++- man/reexports.Rd | 113 +++++++++++++++++ 5 files changed, 519 insertions(+), 9 deletions(-) create mode 100644 man/reexports.Rd diff --git a/NAMESPACE b/NAMESPACE index 53fd4df3..bb2f072e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -75,15 +75,69 @@ S3method(transmute,duckplyr_df) S3method(ungroup,duckplyr_df) S3method(union,duckplyr_df) S3method(union_all,duckplyr_df) +export(across) +export(add_count) +export(add_tally) +export(all_equal) +export(anti_join) +export(arrange) export(as_duckplyr_df) +export(auto_copy) +export(c_across) +export(case_when) +export(collapse) +export(collect) +export(compute) +export(count) +export(cross_join) +export(desc) +export(distinct) +export(do) +export(dplyr_col_modify) +export(dplyr_reconstruct) +export(dplyr_row_slice) export(duckdb_from_file) export(duckdb_rel_from_df) export(duckplyr_df_from_file) +export(explain) +export(filter) +export(first) +export(full_join) +export(funs) +export(funs_) +export(group_by) +export(group_data) +export(group_keys) +export(group_map) +export(group_modify) +export(group_rows) +export(group_size) +export(group_vars) +export(group_walk) +export(if_all) +export(if_any) +export(inner_join) +export(intersect) export(is_duckplyr_df) +export(is_grouped_df) +export(join_by) +export(lag) +export(last) +export(lead) +export(left_join) export(methods_overwrite) export(methods_restore) +export(mutate) +export(mutate_all) +export(n) +export(n_distinct) +export(nest_by) +export(nest_join) export(new_relational) export(new_relexpr) +export(nth) +export(pull) +export(reframe) export(rel_aggregate) export(rel_alias.duckdb_relation) export(rel_alias.relational_df) @@ -111,7 +165,51 @@ export(relexpr_function) export(relexpr_reference) export(relexpr_set_alias) export(relexpr_window) +export(relocate) +export(rename) +export(rename_all) +export(rename_at) +export(rename_if) +export(rename_with) +export(right_join) +export(row_number) +export(rows_append) +export(rows_delete) +export(rows_insert) +export(rows_patch) +export(rows_update) +export(rows_upsert) +export(rowwise) +export(same_src) +export(sample_frac) +export(sample_n) +export(select) +export(select_all) +export(select_at) +export(select_if) +export(semi_join) +export(setdiff) +export(setequal) +export(slice) +export(slice_head) +export(slice_max) +export(slice_min) +export(slice_sample) +export(slice_tail) export(stats_show) +export(summarise) +export(summarise_all) +export(summarise_at) +export(summarize) +export(symdiff) +export(tally) +export(tbl_vars) +export(transmute) +export(ungroup) +export(union) +export(union_all) +export(vars) +export(with_groups) export(wrap_df) export(wrap_integer) import(rlang) diff --git a/R/duckplyr-package.R b/R/duckplyr-package.R index 3179b288..5db605fb 100644 --- a/R/duckplyr-package.R +++ b/R/duckplyr-package.R @@ -8,104 +8,400 @@ #' @import rlang #' @importFrom collections dict #' @importFrom collections queue +NULL + #' @importFrom dplyr across +#' @export +dplyr::across + #' @importFrom dplyr add_count +#' @export +dplyr::add_count + #' @importFrom dplyr add_tally +#' @export +dplyr::add_tally + #' @importFrom dplyr all_equal +#' @export +dplyr::all_equal + #' @importFrom dplyr anti_join +#' @export +dplyr::anti_join + #' @importFrom dplyr arrange +#' @export +dplyr::arrange + #' @importFrom dplyr auto_copy +#' @export +dplyr::auto_copy + #' @importFrom dplyr c_across +#' @export +dplyr::c_across + #' @importFrom dplyr case_when +#' @export +dplyr::case_when + #' @importFrom dplyr collapse +#' @export +dplyr::collapse + #' @importFrom dplyr collect +#' @export +dplyr::collect + #' @importFrom dplyr compute +#' @export +dplyr::compute + #' @importFrom dplyr count +#' @export +dplyr::count + #' @importFrom dplyr cross_join +#' @export +dplyr::cross_join + #' @importFrom dplyr desc +#' @export +dplyr::desc + #' @importFrom dplyr distinct +#' @export +dplyr::distinct + #' @importFrom dplyr do +#' @export +dplyr::do + #' @importFrom dplyr dplyr_col_modify +#' @export +dplyr::dplyr_col_modify + #' @importFrom dplyr dplyr_reconstruct +#' @export +dplyr::dplyr_reconstruct + #' @importFrom dplyr dplyr_row_slice +#' @export +dplyr::dplyr_row_slice + #' @importFrom dplyr explain +#' @export +dplyr::explain + #' @importFrom dplyr filter +#' @export +dplyr::filter + #' @importFrom dplyr first +#' @export +dplyr::first + #' @importFrom dplyr full_join +#' @export +dplyr::full_join + #' @importFrom dplyr funs +#' @export +dplyr::funs + #' @importFrom dplyr funs_ +#' @export +dplyr::funs_ + #' @importFrom dplyr group_by +#' @export +dplyr::group_by + #' @importFrom dplyr group_data +#' @export +dplyr::group_data + #' @importFrom dplyr group_keys +#' @export +dplyr::group_keys + #' @importFrom dplyr group_map +#' @export +dplyr::group_map + #' @importFrom dplyr group_modify +#' @export +dplyr::group_modify + #' @importFrom dplyr group_rows +#' @export +dplyr::group_rows + #' @importFrom dplyr group_size +#' @export +dplyr::group_size + #' @importFrom dplyr group_vars +#' @export +dplyr::group_vars + #' @importFrom dplyr group_walk +#' @export +dplyr::group_walk + #' @importFrom dplyr if_all +#' @export +dplyr::if_all + #' @importFrom dplyr if_any +#' @export +dplyr::if_any + #' @importFrom dplyr inner_join +#' @export +dplyr::inner_join + #' @importFrom dplyr intersect +#' @export +dplyr::intersect + #' @importFrom dplyr is_grouped_df +#' @export +dplyr::is_grouped_df + #' @importFrom dplyr join_by +#' @export +dplyr::join_by + #' @importFrom dplyr lag +#' @export +dplyr::lag + #' @importFrom dplyr last +#' @export +dplyr::last + #' @importFrom dplyr lead +#' @export +dplyr::lead + #' @importFrom dplyr left_join +#' @export +dplyr::left_join + #' @importFrom dplyr mutate +#' @export +dplyr::mutate + #' @importFrom dplyr mutate_all +#' @export +dplyr::mutate_all + #' @importFrom dplyr n +#' @export +dplyr::n + #' @importFrom dplyr n_distinct +#' @export +dplyr::n_distinct + #' @importFrom dplyr nest_by +#' @export +dplyr::nest_by + #' @importFrom dplyr nest_join +#' @export +dplyr::nest_join + #' @importFrom dplyr nth +#' @export +dplyr::nth + #' @importFrom dplyr pull +#' @export +dplyr::pull + #' @importFrom dplyr reframe +#' @export +dplyr::reframe + #' @importFrom dplyr relocate +#' @export +dplyr::relocate + #' @importFrom dplyr rename +#' @export +dplyr::rename + #' @importFrom dplyr rename_all +#' @export +dplyr::rename_all + #' @importFrom dplyr rename_at +#' @export +dplyr::rename_at + #' @importFrom dplyr rename_if +#' @export +dplyr::rename_if + #' @importFrom dplyr rename_with +#' @export +dplyr::rename_with + #' @importFrom dplyr right_join +#' @export +dplyr::right_join + #' @importFrom dplyr row_number +#' @export +dplyr::row_number + #' @importFrom dplyr rows_append +#' @export +dplyr::rows_append + #' @importFrom dplyr rows_delete +#' @export +dplyr::rows_delete + #' @importFrom dplyr rows_insert +#' @export +dplyr::rows_insert + #' @importFrom dplyr rows_patch +#' @export +dplyr::rows_patch + #' @importFrom dplyr rows_update +#' @export +dplyr::rows_update + #' @importFrom dplyr rows_upsert +#' @export +dplyr::rows_upsert + #' @importFrom dplyr rowwise +#' @export +dplyr::rowwise + #' @importFrom dplyr same_src +#' @export +dplyr::same_src + #' @importFrom dplyr sample_frac +#' @export +dplyr::sample_frac + #' @importFrom dplyr sample_n +#' @export +dplyr::sample_n + #' @importFrom dplyr select +#' @export +dplyr::select + #' @importFrom dplyr select_all +#' @export +dplyr::select_all + #' @importFrom dplyr select_at +#' @export +dplyr::select_at + #' @importFrom dplyr select_if +#' @export +dplyr::select_if + #' @importFrom dplyr semi_join +#' @export +dplyr::semi_join + #' @importFrom dplyr setdiff +#' @export +dplyr::setdiff + #' @importFrom dplyr setequal +#' @export +dplyr::setequal + #' @importFrom dplyr slice +#' @export +dplyr::slice + #' @importFrom dplyr slice_head +#' @export +dplyr::slice_head + #' @importFrom dplyr slice_max +#' @export +dplyr::slice_max + #' @importFrom dplyr slice_min +#' @export +dplyr::slice_min + #' @importFrom dplyr slice_sample +#' @export +dplyr::slice_sample + #' @importFrom dplyr slice_tail +#' @export +dplyr::slice_tail + #' @importFrom dplyr summarise +#' @export +dplyr::summarise + #' @importFrom dplyr summarise_all +#' @export +dplyr::summarise_all + #' @importFrom dplyr summarise_at +#' @export +dplyr::summarise_at + #' @importFrom dplyr summarize +#' @export +dplyr::summarize + #' @importFrom dplyr symdiff +#' @export +dplyr::symdiff + #' @importFrom dplyr tally +#' @export +dplyr::tally + #' @importFrom dplyr tbl_vars +#' @export +dplyr::tbl_vars + #' @importFrom dplyr transmute +#' @export +dplyr::transmute + #' @importFrom dplyr ungroup +#' @export +dplyr::ungroup + #' @importFrom dplyr union +#' @export +dplyr::union + #' @importFrom dplyr union_all +#' @export +dplyr::union_all + #' @importFrom dplyr vars +#' @export +dplyr::vars + #' @importFrom dplyr with_groups +#' @export +dplyr::with_groups + #' @importFrom glue glue #' @importFrom lifecycle deprecated #' @importFrom purrr imap diff --git a/README.Rmd b/README.Rmd index 846c532f..a7e08de4 100644 --- a/README.Rmd +++ b/README.Rmd @@ -45,8 +45,9 @@ pak::pak("duckdblabs/duckplyr") This is a basic example which shows you how to solve a common problem: ```{r example} -library(duckdb) +library(conflicted) library(duckplyr) +conflict_prefer("filter", "duckplyr") # Use `as_duckplyr_df()` to enable processing with duckdb: out <- @@ -73,7 +74,7 @@ out ## Extensibility -This package only provides generics, for which other packages may then implement methods. +This package also provides generics, for which other packages may then implement methods. ```{r extensibility} library(duckplyr) diff --git a/README.md b/README.md index 41dec4a7..1b357011 100644 --- a/README.md +++ b/README.md @@ -26,17 +26,19 @@ You can also install the development version of duckplyr from [GitHub](https://g This is a basic example which shows you how to solve a common problem:
-library(duckdb)
-#> Loading required package: DBI
+library(conflicted)
 library(duckplyr)
+conflict_prefer("filter", "duckplyr")
+#> [conflicted] Will prefer duckplyr::filter over
+#> any other package.
 
 # Use `as_duckplyr_df()` to enable processing with duckdb:
 out <-
   palmerpenguins::penguins %>%
   as_duckplyr_df() %>%
-  transmute(bill_area = bill_length_mm * bill_depth_mm, bill_length_mm, species, sex) %>%
-  filter(bill_length_mm < 40) %>%
-  select(-bill_length_mm)
+  transmute(bill_area = bill_length_mm * bill_depth_mm, bill_length_mm, species, sex) %>%
+  filter(bill_length_mm < 40) %>%
+  select(-bill_length_mm)
 
 # The result is a data frame or tibble, with its own class.
 class(out)
@@ -46,7 +48,7 @@ This is a basic example which shows you how to solve a common problem:
 
 # duckdb is responsible for eventually carrying out the operations:
 out %>%
-  explain()
+  explain()
 #> Can't convert to relational, fallback implementation will be used.
 
 # The contents of this data frame are computed only upon request:
@@ -85,7 +87,7 @@ This is a basic example which shows you how to solve a common problem:
 
 ## Extensibility
 
-This package only provides generics, for which other packages may then implement methods.
+This package also provides generics, for which other packages may then implement methods.
 
 
 library(duckplyr)
diff --git a/man/reexports.Rd b/man/reexports.Rd
new file mode 100644
index 00000000..53f5cc0e
--- /dev/null
+++ b/man/reexports.Rd
@@ -0,0 +1,113 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/duckplyr-package.R
+\docType{import}
+\name{reexports}
+\alias{reexports}
+\alias{across}
+\alias{add_count}
+\alias{add_tally}
+\alias{all_equal}
+\alias{anti_join}
+\alias{arrange}
+\alias{auto_copy}
+\alias{c_across}
+\alias{case_when}
+\alias{collapse}
+\alias{collect}
+\alias{compute}
+\alias{count}
+\alias{cross_join}
+\alias{desc}
+\alias{distinct}
+\alias{do}
+\alias{dplyr_col_modify}
+\alias{dplyr_reconstruct}
+\alias{dplyr_row_slice}
+\alias{explain}
+\alias{filter}
+\alias{first}
+\alias{full_join}
+\alias{funs}
+\alias{funs_}
+\alias{group_by}
+\alias{group_data}
+\alias{group_keys}
+\alias{group_map}
+\alias{group_modify}
+\alias{group_rows}
+\alias{group_size}
+\alias{group_vars}
+\alias{group_walk}
+\alias{if_all}
+\alias{if_any}
+\alias{inner_join}
+\alias{intersect}
+\alias{is_grouped_df}
+\alias{join_by}
+\alias{lag}
+\alias{last}
+\alias{lead}
+\alias{left_join}
+\alias{mutate}
+\alias{mutate_all}
+\alias{n}
+\alias{n_distinct}
+\alias{nest_by}
+\alias{nest_join}
+\alias{nth}
+\alias{pull}
+\alias{reframe}
+\alias{relocate}
+\alias{rename}
+\alias{rename_all}
+\alias{rename_at}
+\alias{rename_if}
+\alias{rename_with}
+\alias{right_join}
+\alias{row_number}
+\alias{rows_append}
+\alias{rows_delete}
+\alias{rows_insert}
+\alias{rows_patch}
+\alias{rows_update}
+\alias{rows_upsert}
+\alias{rowwise}
+\alias{same_src}
+\alias{sample_frac}
+\alias{sample_n}
+\alias{select}
+\alias{select_all}
+\alias{select_at}
+\alias{select_if}
+\alias{semi_join}
+\alias{setdiff}
+\alias{setequal}
+\alias{slice}
+\alias{slice_head}
+\alias{slice_max}
+\alias{slice_min}
+\alias{slice_sample}
+\alias{slice_tail}
+\alias{summarise}
+\alias{summarise_all}
+\alias{summarise_at}
+\alias{summarize}
+\alias{symdiff}
+\alias{tally}
+\alias{tbl_vars}
+\alias{transmute}
+\alias{ungroup}
+\alias{union}
+\alias{union_all}
+\alias{vars}
+\alias{with_groups}
+\title{Objects exported from other packages}
+\keyword{internal}
+\description{
+These objects are imported from other packages. Follow the links
+below to see their documentation.
+
+\describe{
+  \item{dplyr}{\code{\link[dplyr]{across}}, \code{\link[dplyr:count]{add_count}}, \code{\link[dplyr:count]{add_tally}}, \code{\link[dplyr]{all_equal}}, \code{\link[dplyr:filter-joins]{anti_join}}, \code{\link[dplyr]{arrange}}, \code{\link[dplyr]{auto_copy}}, \code{\link[dplyr]{c_across}}, \code{\link[dplyr]{case_when}}, \code{\link[dplyr:compute]{collapse}}, \code{\link[dplyr:compute]{collect}}, \code{\link[dplyr]{compute}}, \code{\link[dplyr]{count}}, \code{\link[dplyr]{cross_join}}, \code{\link[dplyr]{desc}}, \code{\link[dplyr]{distinct}}, \code{\link[dplyr]{do}}, \code{\link[dplyr:dplyr_extending]{dplyr_col_modify}}, \code{\link[dplyr:dplyr_extending]{dplyr_reconstruct}}, \code{\link[dplyr:dplyr_extending]{dplyr_row_slice}}, \code{\link[dplyr]{explain}}, \code{\link[dplyr]{filter}}, \code{\link[dplyr:nth]{first}}, \code{\link[dplyr:mutate-joins]{full_join}}, \code{\link[dplyr]{funs}}, \code{\link[dplyr:se-deprecated]{funs_}}, \code{\link[dplyr]{group_by}}, \code{\link[dplyr]{group_data}}, \code{\link[dplyr:group_data]{group_keys}}, \code{\link[dplyr]{group_map}}, \code{\link[dplyr:group_map]{group_modify}}, \code{\link[dplyr:group_data]{group_rows}}, \code{\link[dplyr:group_data]{group_size}}, \code{\link[dplyr:group_data]{group_vars}}, \code{\link[dplyr:group_map]{group_walk}}, \code{\link[dplyr:across]{if_all}}, \code{\link[dplyr:across]{if_any}}, \code{\link[dplyr:mutate-joins]{inner_join}}, \code{\link[dplyr:setops]{intersect}}, \code{\link[dplyr:grouped_df]{is_grouped_df}}, \code{\link[dplyr]{join_by}}, \code{\link[dplyr:lead-lag]{lag}}, \code{\link[dplyr:nth]{last}}, \code{\link[dplyr:lead-lag]{lead}}, \code{\link[dplyr:mutate-joins]{left_join}}, \code{\link[dplyr]{mutate}}, \code{\link[dplyr]{mutate_all}}, \code{\link[dplyr:context]{n}}, \code{\link[dplyr]{n_distinct}}, \code{\link[dplyr]{nest_by}}, \code{\link[dplyr]{nest_join}}, \code{\link[dplyr]{nth}}, \code{\link[dplyr]{pull}}, \code{\link[dplyr]{reframe}}, \code{\link[dplyr]{relocate}}, \code{\link[dplyr]{rename}}, \code{\link[dplyr:select_all]{rename_all}}, \code{\link[dplyr:select_all]{rename_at}}, \code{\link[dplyr:select_all]{rename_if}}, \code{\link[dplyr:rename]{rename_with}}, \code{\link[dplyr:mutate-joins]{right_join}}, \code{\link[dplyr]{row_number}}, \code{\link[dplyr:rows]{rows_append}}, \code{\link[dplyr:rows]{rows_delete}}, \code{\link[dplyr:rows]{rows_insert}}, \code{\link[dplyr:rows]{rows_patch}}, \code{\link[dplyr:rows]{rows_update}}, \code{\link[dplyr:rows]{rows_upsert}}, \code{\link[dplyr]{rowwise}}, \code{\link[dplyr]{same_src}}, \code{\link[dplyr:sample_n]{sample_frac}}, \code{\link[dplyr]{sample_n}}, \code{\link[dplyr]{select}}, \code{\link[dplyr]{select_all}}, \code{\link[dplyr:select_all]{select_at}}, \code{\link[dplyr:select_all]{select_if}}, \code{\link[dplyr:filter-joins]{semi_join}}, \code{\link[dplyr:setops]{setdiff}}, \code{\link[dplyr:setops]{setequal}}, \code{\link[dplyr]{slice}}, \code{\link[dplyr:slice]{slice_head}}, \code{\link[dplyr:slice]{slice_max}}, \code{\link[dplyr:slice]{slice_min}}, \code{\link[dplyr:slice]{slice_sample}}, \code{\link[dplyr:slice]{slice_tail}}, \code{\link[dplyr]{summarise}}, \code{\link[dplyr]{summarise_all}}, \code{\link[dplyr:summarise_all]{summarise_at}}, \code{\link[dplyr:summarise]{summarize}}, \code{\link[dplyr:setops]{symdiff}}, \code{\link[dplyr:count]{tally}}, \code{\link[dplyr]{tbl_vars}}, \code{\link[dplyr]{transmute}}, \code{\link[dplyr:group_by]{ungroup}}, \code{\link[dplyr:setops]{union}}, \code{\link[dplyr:setops]{union_all}}, \code{\link[dplyr]{vars}}, \code{\link[dplyr]{with_groups}}}
+}}
+

From 6524a02965426e3fd3d417ca6bd92d690159e52d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kirill=20M=C3=BCller?= 
Date: Sat, 9 Sep 2023 20:50:23 +0200
Subject: [PATCH 4/8] Experimental

---
 README.Rmd | 1 +
 README.md  | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/README.Rmd b/README.Rmd
index a7e08de4..0c730dd8 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -20,6 +20,7 @@ set.seed(20230702)
 # duckplyr
 
 
+[![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental)
 
 
 The goal of duckplyr is to provide a drop-in replacement for dplyr that uses DuckDB as a backend for fast operation.
diff --git a/README.md b/README.md
index 1b357011..a8f3909a 100644
--- a/README.md
+++ b/README.md
@@ -4,6 +4,8 @@
 
 
 
+[![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental)
+
 
 
 The goal of duckplyr is to provide a drop-in replacement for dplyr that uses DuckDB as a backend for fast operation. It also defines a set of generics that provide a low-level implementer’s interface for dplyr’s high-level user interface.

From fbe2fe3ccf8995ceba327e1fe95904560b0a0e29 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kirill=20M=C3=BCller?= 
Date: Sun, 10 Sep 2023 07:19:11 +0200
Subject: [PATCH 5/8] Restore accidentally deleted methods

---
 NAMESPACE          | 15 +++++++++------
 R/relational-rel.R | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 man/relational.Rd  | 28 ++++++++++++++++++++++++++++
 3 files changed, 83 insertions(+), 6 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index bb2f072e..4bbc24bd 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -26,8 +26,12 @@ S3method(pull,duckplyr_df)
 S3method(reframe,duckplyr_df)
 S3method(rel_aggregate,duckdb_relation)
 S3method(rel_aggregate,relational_df)
+S3method(rel_alias,duckdb_relation)
+S3method(rel_alias,relational_df)
 S3method(rel_distinct,duckdb_relation)
 S3method(rel_distinct,relational_df)
+S3method(rel_explain,duckdb_relation)
+S3method(rel_explain,relational_df)
 S3method(rel_filter,duckdb_relation)
 S3method(rel_filter,relational_df)
 S3method(rel_join,duckdb_relation)
@@ -40,6 +44,8 @@ S3method(rel_order,duckdb_relation)
 S3method(rel_order,relational_df)
 S3method(rel_project,duckdb_relation)
 S3method(rel_project,relational_df)
+S3method(rel_set_alias,duckdb_relation)
+S3method(rel_set_alias,relational_df)
 S3method(rel_set_diff,duckdb_relation)
 S3method(rel_set_diff,relational_df)
 S3method(rel_set_intersect,duckdb_relation)
@@ -139,11 +145,9 @@ export(nth)
 export(pull)
 export(reframe)
 export(rel_aggregate)
-export(rel_alias.duckdb_relation)
-export(rel_alias.relational_df)
+export(rel_alias)
 export(rel_distinct)
-export(rel_explain.duckdb_relation)
-export(rel_explain.relational_df)
+export(rel_explain)
 export(rel_filter)
 export(rel_from_df)
 export(rel_join)
@@ -151,8 +155,7 @@ export(rel_limit)
 export(rel_names)
 export(rel_order)
 export(rel_project)
-export(rel_set_alias.duckdb_relation)
-export(rel_set_alias.relational_df)
+export(rel_set_alias)
 export(rel_set_diff)
 export(rel_set_intersect)
 export(rel_set_symdiff)
diff --git a/R/relational-rel.R b/R/relational-rel.R
index fd3b8b14..e42bb060 100644
--- a/R/relational-rel.R
+++ b/R/relational-rel.R
@@ -171,6 +171,7 @@ rel_order <- function(rel, orders, ...) {
 #'
 #' @param conds A list of expressions to use for the join.
 #' @param join The type of join.
+#' @param join_ref_type The ref type of join.
 #' @rdname relational
 #' @export
 #' @examplesIf requireNamespace("dplyr", quietly = TRUE)
@@ -190,6 +191,7 @@ rel_join <- function(left,
                      right,
                      conds,
                      join = c("inner", "left", "right", "outer", "cross", "semi", "anti"),
+                     join_ref_type = c("regular", "natural", "cross", "positional", "asof"),
                      ...) {
   rel_stats_env$rel_join <- (rel_stats_env$rel_join %||% 0L) + 1L
   UseMethod("rel_join")
@@ -286,6 +288,50 @@ rel_union_all <- function(rel_a, rel_b, ...) {
   UseMethod("rel_union_all")
 }
 
+#' rel_explain
+#'
+#' `rel_explain()` prints an explanation of the plan
+#' executed by the relational object.
+#'
+#' @rdname relational
+#' @export
+#' @examples
+#'
+#' rel <- rel_from_df(mtcars)
+#' rel_explain(rel)
+rel_explain <- function(rel, ...) {
+  rel_stats_env$rel_explain <- (rel_stats_env$rel_explain %||% 0L) + 1L
+  UseMethod("rel_explain")
+}
+
+#' rel_alias
+#'
+#' `rel_alias()` returns the alias name for a relational object.
+#'
+#' @rdname relational
+#' @export
+rel_alias <- function(rel, ...) {
+  rel_stats_env$rel_alias <- (rel_stats_env$rel_alias %||% 0L) + 1L
+  UseMethod("rel_alias")
+}
+
+#' rel_set_alias
+#'
+#' `rel_set_alias()` sets the alias name for a relational object.
+#'
+#' @rdname relational
+#' @param alias the new alias
+#' @export
+#' @examples
+#'
+#' rel <- rel_from_df(mtcars)
+#' rel_set_alias(rel, "my_new_alias")
+#' rel_alias(rel)
+rel_set_alias <- function(rel, alias, ...) {
+  rel_stats_env$rel_set_alias <- (rel_stats_env$rel_set_alias %||% 0L) + 1L
+  UseMethod("rel_set_alias")
+}
+
 #' rel_names()
 #'
 #' `rel_names()` returns the column names as character vector,
diff --git a/man/relational.Rd b/man/relational.Rd
index 69edbb53..2b0f141f 100644
--- a/man/relational.Rd
+++ b/man/relational.Rd
@@ -15,6 +15,9 @@
 \alias{rel_set_diff}
 \alias{rel_set_symdiff}
 \alias{rel_union_all}
+\alias{rel_explain}
+\alias{rel_alias}
+\alias{rel_set_alias}
 \alias{rel_names}
 \title{Relational implementer's interface}
 \usage{
@@ -35,6 +38,7 @@ rel_join(
   right,
   conds,
   join = c("inner", "left", "right", "outer", "cross", "semi", "anti"),
+  join_ref_type = c("regular", "natural", "cross", "positional", "asof"),
   ...
 )
 
@@ -50,6 +54,12 @@ rel_set_symdiff(rel_a, rel_b, ...)
 
 rel_union_all(rel_a, rel_b, ...)
 
+rel_explain(rel, ...)
+
+rel_alias(rel, ...)
+
+rel_set_alias(rel, alias, ...)
+
 rel_names(rel, ...)
 }
 \arguments{
@@ -71,7 +81,11 @@ rel_names(rel, ...)
 
 \item{join}{The type of join.}
 
+\item{join_ref_type}{The ref type of join.}
+
 \item{n}{The number of rows.}
+
+\item{alias}{the new alias}
 }
 \value{
 \itemize{
@@ -132,6 +146,13 @@ to be used by \code{\link[dplyr:setops]{dplyr::symdiff()}}.
 \code{rel_union_all()} returns rows present in any of both tables,
 to be used by \code{\link[dplyr:setops]{dplyr::union_all()}}.
 
+\code{rel_explain()} prints an explanation of the plan
+executed by the relational object.
+
+\code{rel_alias()} returns the alias name for a relational object.
+
+\code{rel_set_alias()} sets the alias name for a relational object.
+
 \code{rel_names()} returns the column names as character vector,
 to be used by \code{\link[=colnames]{colnames()}}.
 }
@@ -223,6 +244,13 @@ rel_distinct.dfrel <- function(rel, ...) {
 
 rel_distinct(new_dfrel(mtcars[1:3, 1:4]))
 
+rel <- rel_from_df(mtcars)
+rel_explain(rel)
+
+rel <- rel_from_df(mtcars)
+rel_set_alias(rel, "my_new_alias")
+rel_alias(rel)
+
 rel_names.dfrel <- function(rel, ...) {
   df <- unclass(rel)[[1]]
 

From 1a4d9ddf7ab3c5c6b12ae8af19264992b6bcdbc5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kirill=20M=C3=BCller?= 
Date: Sun, 10 Sep 2023 07:20:39 +0200
Subject: [PATCH 6/8] Remove dead

---
 NAMESPACE             | 2 --
 R/relational-df.R     | 4 ----
 R/relational-duckdb.R | 4 ----
 3 files changed, 10 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 4bbc24bd..e05dbf6a 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -160,8 +160,6 @@ export(rel_set_diff)
 export(rel_set_intersect)
 export(rel_set_symdiff)
 export(rel_to_df)
-export(rel_tostring.duckdb_relation)
-export(rel_tostring.relational_df)
 export(rel_union_all)
 export(relexpr_constant)
 export(relexpr_function)
diff --git a/R/relational-df.R b/R/relational-df.R
index ef37bf8a..71f5db6f 100644
--- a/R/relational-df.R
+++ b/R/relational-df.R
@@ -62,10 +62,6 @@ rel_union_all.relational_df <- function(rel_a, rel_b, ...) {
 }
 
 
-#' @export
-rel_tostring.relational_df <- function(rel, ...) {
-}
-
 #' @export
 rel_explain.relational_df <- function(rel, ...) {
 }
diff --git a/R/relational-duckdb.R b/R/relational-duckdb.R
index 7717e63e..a76ba30f 100644
--- a/R/relational-duckdb.R
+++ b/R/relational-duckdb.R
@@ -320,10 +320,6 @@ rel_union_all.duckdb_relation <- function(rel_a, rel_b, ...) {
   out
 }
 
-#' @export
-rel_tostring.duckdb_relation <- function(rel, ...) {
-}
-
 #' @export
 rel_explain.duckdb_relation <- function(rel, ...) {
   duckdb$rel_explain(rel)

From 8aed1d0ebddd6d5d92f9fefdbbb33ecd614cc79a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kirill=20M=C3=BCller?= 
Date: Sun, 10 Sep 2023 07:56:26 +0200
Subject: [PATCH 7/8] README

---
 README.Rmd |  97 ++++++++++++++++++------
 README.md  | 213 +++++++++++++++++++++++++++++++++++++++++------------
 2 files changed, 240 insertions(+), 70 deletions(-)

diff --git a/README.Rmd b/README.Rmd
index 0c730dd8..4895c867 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -26,26 +26,21 @@ set.seed(20230702)
 The goal of duckplyr is to provide a drop-in replacement for dplyr that uses DuckDB as a backend for fast operation.
 It also defines a set of generics that provide a low-level implementer's interface for dplyr's high-level user interface.
 
-## Installation
+## Example
 
-Once on CRAN, you can install duckplyr with:
+There are two ways to use duckplyr.
 
-``` r
-install.packages("duckplyr")
-```
+1. To enable for individual data frames, use `as_duckplyr_df()` as the first step in your pipe.
+1. To enable for the entire session, use `methods_override()`.
 
-You can also install the development version of duckplyr from [GitHub](https://github.com/) with:
+The examples below illustrate both methods.
+See also the companion [demo repository](https://github.com/Tmonster/duckplyr_demo) for a use case with a large dataset.
 
-``` r
-# install.packages("pak", repos = sprintf("https://r-lib.github.io/p/pak/stable/%s/%s/%s", .Platform$pkgType, R.Version()$os, R.Version()$arch))
-pak::pak("duckdblabs/duckplyr")
-```
+### Individual
 
-## Example
+This example illustrates usage of duckplyr for individual data frames.
 
-This is a basic example which shows you how to solve a common problem:
-
-```{r example}
+```{r individual}
 library(conflicted)
 library(duckplyr)
 conflict_prefer("filter", "duckplyr")
@@ -53,26 +48,71 @@ conflict_prefer("filter", "duckplyr")
 # Use `as_duckplyr_df()` to enable processing with duckdb:
 out <-
   palmerpenguins::penguins %>%
+  # CAVEAT: factor columns are not supported yet
+  mutate(across(where(is.factor), as.character)) %>%
   as_duckplyr_df() %>%
-  transmute(bill_area = bill_length_mm * bill_depth_mm, bill_length_mm, species, sex) %>%
-  filter(bill_length_mm < 40) %>%
-  select(-bill_length_mm)
+  mutate(bill_area = bill_length_mm * bill_depth_mm) %>%
+  summarize(.by = c(species, sex), mean_bill_area = mean(bill_area)) %>%
+  filter(species != "Gentoo")
 
 # The result is a data frame or tibble, with its own class.
 class(out)
 names(out)
 
-# duckdb is responsible for eventually carrying out the operations:
+# duckdb is responsible for eventually carrying out the operations.
+# Despite the late filter, the summary is not computed for the Gentoo species.
 out %>%
   explain()
 
-# The contents of this data frame are computed only upon request:
-out
+# All data frame operations are supported.
+# Computation happens upon the first request.
+out$mean_bill_area
 
-# Once computed, the results remain available as a data frame:
+# After the computation has been carried out, the results are available
+# immediately:
 out
 ```
 
+
+### Session-wide
+
+This example illustrates usage of duckplyr for all data frames in the R session.
+
+```{r session}
+library(conflicted)
+library(duckplyr)
+conflict_prefer("filter", "duckplyr")
+
+# Use `methods_overwrite()` to enable processing with duckdb for all data frames:
+methods_overwrite()
+
+# This is the same query as above, without `as_duckplyr_df()`:
+out <-
+  palmerpenguins::penguins %>%
+  # CAVEAT: factor columns are not supported yet
+  mutate(across(where(is.factor), as.character)) %>%
+  mutate(bill_area = bill_length_mm * bill_depth_mm) %>%
+  summarize(.by = c(species, sex), mean_bill_area = mean(bill_area)) %>%
+  filter(species != "Gentoo")
+
+# The result is a plain tibble now:
+class(out)
+
+# Querying the number of rows also starts the computation:
+nrow(out)
+
+# Restart R, or call `methods_restore()` to revert to the default dplyr implementation.
+methods_restore()
+
+# dplyr is active again:
+palmerpenguins::penguins %>%
+  # CAVEAT: factor columns are not supported yet
+  mutate(across(where(is.factor), as.character)) %>%
+  mutate(bill_area = bill_length_mm * bill_depth_mm) %>%
+  summarize(.by = c(species, sex), mean_bill_area = mean(bill_area)) %>%
+  filter(species != "Gentoo")
+```
+
 ## Extensibility
 
 This package also provides generics, for which other packages may then implement methods.
@@ -171,3 +211,18 @@ rel_names.dfrel <- function(rel, ...) {
 
 rel_names(mtcars_rel)
 ```
+
+## Installation
+
+Once on CRAN, you can install duckplyr with:
+
+``` r
+install.packages("duckplyr")
+```
+
+You can also install the development version of duckplyr from [GitHub](https://github.com/) with:
+
+``` r
+# install.packages("pak", repos = sprintf("https://r-lib.github.io/p/pak/stable/%s/%s/%s", .Platform$pkgType, R.Version()$os, R.Version()$arch))
+pak::pak("duckdblabs/duckplyr")
+```
diff --git a/README.md b/README.md
index a8f3909a..497dc865 100644
--- a/README.md
+++ b/README.md
@@ -10,22 +10,18 @@
 
 The goal of duckplyr is to provide a drop-in replacement for dplyr that uses DuckDB as a backend for fast operation. It also defines a set of generics that provide a low-level implementer’s interface for dplyr’s high-level user interface.
 
-## Installation
-
-Once on CRAN, you can install duckplyr with:
+## Example
 
-
-install.packages("duckplyr")
+There are two ways to use duckplyr. -You can also install the development version of duckplyr from [GitHub](https://github.com/) with: +1. To enable for individual data frames, use `as_duckplyr_df()` as the first step in your pipe. +2. To enable for the entire session, use `methods_override()`. -
-# install.packages("pak", repos = sprintf("https://r-lib.github.io/p/pak/stable/%s/%s/%s", .Platform$pkgType, R.Version()$os, R.Version()$arch))
-pak::pak("duckdblabs/duckplyr")
+The examples below illustrate both methods. See also the companion [demo repository](https://github.com/Tmonster/duckplyr_demo) for a use case with a large dataset. -## Example +### Individual -This is a basic example which shows you how to solve a common problem: +This example illustrates usage of duckplyr for individual data frames.
 library(conflicted)
@@ -37,55 +33,161 @@ This is a basic example which shows you how to solve a common problem:
 # Use `as_duckplyr_df()` to enable processing with duckdb:
 out <-
   palmerpenguins::penguins %>%
+  # CAVEAT: factor columns are not supported yet
+  mutate(across(where(is.factor), as.character)) %>%
   as_duckplyr_df() %>%
-  transmute(bill_area = bill_length_mm * bill_depth_mm, bill_length_mm, species, sex) %>%
-  filter(bill_length_mm < 40) %>%
-  select(-bill_length_mm)
+  mutate(bill_area = bill_length_mm * bill_depth_mm) %>%
+  summarize(.by = c(species, sex), mean_bill_area = mean(bill_area)) %>%
+  filter(species != "Gentoo")
 
 # The result is a data frame or tibble, with its own class.
 class(out)
 #> [1] "duckplyr_df" "tbl_df"      "tbl"         "data.frame"
 names(out)
-#> [1] "bill_area" "species"   "sex"
+#> [1] "species"        "sex"            "mean_bill_area"
 
-# duckdb is responsible for eventually carrying out the operations:
+# duckdb is responsible for eventually carrying out the operations.
+# Despite the late filter, the summary is not computed for the Gentoo species.
 out %>%
   explain()
-#> Can't convert to relational, fallback implementation will be used.
+#> ┌───────────────────────────┐
+#> │       HASH_GROUP_BY       │
+#> │   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   │
+#> │             #0            │
+#> │             #1            │
+#> │          mean(#2)         │
+#> └─────────────┬─────────────┘                             
+#> ┌─────────────┴─────────────┐
+#> │         PROJECTION        │
+#> │   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   │
+#> │          species          │
+#> │            sex            │
+#> │         bill_area         │
+#> └─────────────┬─────────────┘                             
+#> ┌─────────────┴─────────────┐
+#> │         PROJECTION        │
+#> │   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   │
+#> │          species          │
+#> │            sex            │
+#> │         bill_area         │
+#> └─────────────┬─────────────┘                             
+#> ┌─────────────┴─────────────┐
+#> │           FILTER          │
+#> │   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   │
+#> │   (species != 'Gentoo')   │
+#> │   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   │
+#> │           EC: 0           │
+#> └─────────────┬─────────────┘                             
+#> ┌─────────────┴─────────────┐
+#> │     R_DATAFRAME_SCAN      │
+#> │   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   │
+#> │         data.frame        │
+#> │   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   │
+#> │          species          │
+#> │       bill_length_mm      │
+#> │       bill_depth_mm       │
+#> │            sex            │
+#> │   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   │
+#> │           EC: 0           │
+#> └───────────────────────────┘
 
-# The contents of this data frame are computed only upon request:
-out
-#> # A tibble: 100 × 3
-#>    bill_area species sex   
-#>        <dbl> <fct>   <fct> 
-#>  1      731. Adelie  male  
-#>  2      687. Adelie  female
-#>  3      708. Adelie  female
-#>  4      810. Adelie  male  
-#>  5      692. Adelie  female
-#>  6      768. Adelie  male  
-#>  7      617. Adelie  NA    
-#>  8      646. Adelie  NA    
-#>  9      654. Adelie  NA    
-#> 10      818. Adelie  male  
-#> # ℹ 90 more rows
-
-# Once computed, the results remain available as a data frame:
+# All data frame operations are supported.
+# Computation happens upon the first request.
+out$mean_bill_area
+#> materializing:
+#> ---------------------
+#> --- Relation Tree ---
+#> ---------------------
+#> Filter [!=(species, 'Gentoo')]
+#>   Aggregate [species, sex, mean(bill_area)]
+#>     Projection [species as species, island as island, bill_length_mm as bill_length_mm, bill_depth_mm as bill_depth_mm, flipper_length_mm as flipper_length_mm, body_mass_g as body_mass_g, sex as sex, "year" as year, *(bill_length_mm, bill_depth_mm) as bill_area]
+#>       r_dataframe_scan(0x12c262098)
+#> 
+#> ---------------------
+#> -- Result Columns  --
+#> ---------------------
+#> - species (VARCHAR)
+#> - sex (VARCHAR)
+#> - mean_bill_area (DOUBLE)
+#> 
+#> [1] 770.2627 656.8523 694.9360 819.7503 984.2279
+
+# After the computation has been carried out, the results are available
+# immediately:
 out
-#> # A tibble: 100 × 3
-#>    bill_area species sex   
-#>        <dbl> <fct>   <fct> 
-#>  1      731. Adelie  male  
-#>  2      687. Adelie  female
-#>  3      708. Adelie  female
-#>  4      810. Adelie  male  
-#>  5      692. Adelie  female
-#>  6      768. Adelie  male  
-#>  7      617. Adelie  NA    
-#>  8      646. Adelie  NA    
-#>  9      654. Adelie  NA    
-#> 10      818. Adelie  male  
-#> # ℹ 90 more rows
+#> # A tibble: 5 × 3 +#> species sex mean_bill_area +#> <chr> <chr> <dbl> +#> 1 Adelie male 770. +#> 2 Adelie female 657. +#> 3 Adelie NA 695. +#> 4 Chinstrap female 820. +#> 5 Chinstrap male 984.
+ +### Session-wide + +This example illustrates usage of duckplyr for all data frames in the R session. + +
+library(conflicted)
+library(duckplyr)
+conflict_prefer("filter", "duckplyr")
+#> [conflicted] Removing existing preference.
+#> [conflicted] Will prefer duckplyr::filter over any other package.
+
+# Use `methods_overwrite()` to enable processing with duckdb for all data frames:
+methods_overwrite()
+
+# This is the same query as above, without `as_duckplyr_df()`:
+out <-
+  palmerpenguins::penguins %>%
+  # CAVEAT: factor columns are not supported yet
+  mutate(across(where(is.factor), as.character)) %>%
+  mutate(bill_area = bill_length_mm * bill_depth_mm) %>%
+  summarize(.by = c(species, sex), mean_bill_area = mean(bill_area)) %>%
+  filter(species != "Gentoo")
+
+# The result is a plain tibble now:
+class(out)
+#> [1] "tbl_df"     "tbl"        "data.frame"
+
+# Querying the number of rows also starts the computation:
+nrow(out)
+#> materializing:
+#> ---------------------
+#> --- Relation Tree ---
+#> ---------------------
+#> Filter [!=(species, 'Gentoo')]
+#>   Aggregate [species, sex, mean(bill_area)]
+#>     Projection [species as species, island as island, bill_length_mm as bill_length_mm, bill_depth_mm as bill_depth_mm, flipper_length_mm as flipper_length_mm, body_mass_g as body_mass_g, sex as sex, "year" as year, *(bill_length_mm, bill_depth_mm) as bill_area]
+#>       r_dataframe_scan(0x12b737008)
+#> 
+#> ---------------------
+#> -- Result Columns  --
+#> ---------------------
+#> - species (VARCHAR)
+#> - sex (VARCHAR)
+#> - mean_bill_area (DOUBLE)
+#> [1] 5
+
+# Restart R, or call `methods_restore()` to revert to the default dplyr implementation.
+methods_restore()
+
+# dplyr is active again:
+palmerpenguins::penguins %>%
+  # CAVEAT: factor columns are not supported yet
+  mutate(across(where(is.factor), as.character)) %>%
+  mutate(bill_area = bill_length_mm * bill_depth_mm) %>%
+  summarize(.by = c(species, sex), mean_bill_area = mean(bill_area)) %>%
+  filter(species != "Gentoo")
+#> # A tibble: 5 × 3
+#>   species   sex    mean_bill_area
+#>   <chr>     <chr>           <dbl>
+#> 1 Adelie    male             770.
+#> 2 Adelie    female           657.
+#> 3 Adelie    NA                NA 
+#> 4 Chinstrap female           820.
+#> 5 Chinstrap male             984.
## Extensibility @@ -242,3 +344,16 @@ This package also provides generics, for which other packages may then implement rel_names(mtcars_rel) #> [1] "mpg" "cyl" "disp" "hp"
+ +## Installation + +Once on CRAN, you can install duckplyr with: + +
+install.packages("duckplyr")
+ +You can also install the development version of duckplyr from [GitHub](https://github.com/) with: + +
+# install.packages("pak", repos = sprintf("https://r-lib.github.io/p/pak/stable/%s/%s/%s", .Platform$pkgType, R.Version()$os, R.Version()$arch))
+pak::pak("duckdblabs/duckplyr")
From 1d24518125d3a2b64cbdb4616d9ded427f61a840 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Sun, 10 Sep 2023 20:04:16 +0200 Subject: [PATCH 8/8] Document --- R/methods.R | 8 ++++++++ man/methods_overwrite.Rd | 17 +++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 R/methods.R create mode 100644 man/methods_overwrite.Rd diff --git a/R/methods.R b/R/methods.R new file mode 100644 index 00000000..df337bf8 --- /dev/null +++ b/R/methods.R @@ -0,0 +1,8 @@ +#' Forward all dplyr methods to duckplyr +#' +#' After calling `methods_overwrite()`, all dplyr methods are redirected to duckplyr +#' for the duraton of the session, or until a call to `methods_restore()`. +#' +#' @return Called for their side effects. +#' @aliases methods_restore +"methods_overwrite" diff --git a/man/methods_overwrite.Rd b/man/methods_overwrite.Rd new file mode 100644 index 00000000..861ec002 --- /dev/null +++ b/man/methods_overwrite.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/methods.R +\docType{data} +\name{methods_overwrite} +\alias{methods_overwrite} +\alias{methods_restore} +\title{Forward all dplyr methods to duckplyr} +\format{ +An object of class \code{function} of length 1. +} +\usage{ +methods_overwrite +} +\description{ +Calling this function +} +\keyword{datasets}