Skip to content

Commit

Permalink
Merge pull request #45 from duckdblabs/b-union-all-oo
Browse files Browse the repository at this point in the history
  • Loading branch information
krlmlr authored Sep 7, 2023
2 parents fbadb40 + 35345bb commit 5314def
Show file tree
Hide file tree
Showing 6 changed files with 778 additions and 133 deletions.
4 changes: 3 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ Imports:
collections,
DBI,
dplyr (>= 1.1.3),
duckdb (>= 0.8.1-3),
duckdb (>= 0.8.1-9000),
glue,
lifecycle,
purrr,
Expand All @@ -46,6 +46,8 @@ Suggests:
styler,
testthat (>= 3.1.5),
withr
Remotes:
duckdb=duckdb/duckdb-r
Config/testthat/edition: 3
Config/testthat/parallel: false
Config/testthat/start-first: as_duckplyr_df, mutate, filter, count-tally
Expand Down
26 changes: 20 additions & 6 deletions R/oo.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,15 @@ oo_force <- function() {
return(FALSE)
}

oo_prep <- function(rel, colname = "___row_number", force = oo_force()) {
oo_prep <- function(
rel,
colname = "___row_number",
...,
extra_cols_pre = character(),
extra_cols_post = character(),
force = oo_force()) {
check_dots_empty0(...)

if (!force) {
return(rel)
}
Expand All @@ -22,11 +30,17 @@ oo_prep <- function(rel, colname = "___row_number", force = oo_force()) {
}

proj_exprs <- imap(set_names(names), relexpr_reference, rel = NULL)
proj_exprs <- c(proj_exprs, list(relexpr_window(
relexpr_function("row_number", list()),
partitions = list(),
alias = colname
)))
proj_exprs <- c(
proj_exprs,
if (length(extra_cols_pre)) map(extra_cols_pre, relexpr_constant, val = NA_integer_),
list(relexpr_window(
relexpr_function("row_number", list()),
partitions = list(),
alias = colname
)),
if (length(extra_cols_post)) map(extra_cols_post, relexpr_constant, val = NA_integer_),
NULL
)

rel_project(rel, unname(proj_exprs))
}
Expand Down
7 changes: 7 additions & 0 deletions R/union_all.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,14 @@ union_all.duckplyr_df <- function(x, y, ...) {
y_rel <- rel_project(y_rel, exprs)
}

x_rel <- oo_prep(x_rel, "___row_number_x", extra_cols_post = "___row_number_y")
y_rel <- oo_prep(y_rel, "___row_number_y", extra_cols_pre = "___row_number_x")

rel <- rel_union_all(x_rel, y_rel)

# NULLs sort first in duckdb!
rel <- oo_restore(rel, c("___row_number_x", "___row_number_y"))

out <- rel_to_df(rel)
out <- dplyr_reconstruct(out, x)
return(out)
Expand Down
11 changes: 9 additions & 2 deletions patch/union_all.patch
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
diff --git b/R/union_all.R a/R/union_all.R
index ed860f1..0279a26 100644
index ed860f1..727b739 100644
--- b/R/union_all.R
+++ a/R/union_all.R
@@ -2,9 +2,31 @@
@@ -2,9 +2,38 @@
#' @export
union_all.duckplyr_df <- function(x, y, ...) {
# Our implementation
Expand All @@ -29,7 +29,14 @@ index ed860f1..0279a26 100644
+ y_rel <- rel_project(y_rel, exprs)
+ }
+
+ x_rel <- oo_prep(x_rel, "___row_number_x", extra_cols_post = "___row_number_y")
+ y_rel <- oo_prep(y_rel, "___row_number_y", extra_cols_pre = "___row_number_x")
+
+ rel <- rel_union_all(x_rel, y_rel)
+
+ # NULLs sort first in duckdb!
+ rel <- oo_restore(rel, c("___row_number_x", "___row_number_y"))
+
+ out <- rel_to_df(rel)
+ out <- dplyr_reconstruct(out, x)
return(out)
Expand Down
Loading

0 comments on commit 5314def

Please sign in to comment.