From 9703f2de570c0485ca6d82db2724cabcf6af9f04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Wed, 6 Sep 2023 23:55:02 +0200 Subject: [PATCH 1/3] Add dots --- R/oo.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/oo.R b/R/oo.R index b70e2fd5..1c3705a3 100644 --- a/R/oo.R +++ b/R/oo.R @@ -10,7 +10,9 @@ oo_force <- function() { return(FALSE) } -oo_prep <- function(rel, colname = "___row_number", force = oo_force()) { +oo_prep <- function(rel, colname = "___row_number", ..., force = oo_force()) { + check_dots_empty0(...) + if (!force) { return(rel) } From 349192fc7d224e3b94eec7ea032d3f8ccf9c12fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Thu, 7 Sep 2023 00:16:13 +0200 Subject: [PATCH 2/3] Order-stable union_all() --- R/oo.R | 24 +- R/union_all.R | 7 + patch/union_all.patch | 11 +- tests/testthat/test-rel_api.R | 862 +++++++++++++++++++++++++++++----- tools/00-funs.R | 1 + 5 files changed, 773 insertions(+), 132 deletions(-) diff --git a/R/oo.R b/R/oo.R index 1c3705a3..87600a54 100644 --- a/R/oo.R +++ b/R/oo.R @@ -10,7 +10,13 @@ oo_force <- function() { return(FALSE) } -oo_prep <- function(rel, colname = "___row_number", ..., force = oo_force()) { +oo_prep <- function( + rel, + colname = "___row_number", + ..., + extra_cols_pre = character(), + extra_cols_post = character(), + force = oo_force()) { check_dots_empty0(...) if (!force) { @@ -24,11 +30,17 @@ oo_prep <- function(rel, colname = "___row_number", ..., force = oo_force()) { } proj_exprs <- imap(set_names(names), relexpr_reference, rel = NULL) - proj_exprs <- c(proj_exprs, list(relexpr_window( - relexpr_function("row_number", list()), - partitions = list(), - alias = colname - ))) + proj_exprs <- c( + proj_exprs, + if (length(extra_cols_pre)) map(extra_cols_pre, relexpr_constant, val = NA_integer_), + list(relexpr_window( + relexpr_function("row_number", list()), + partitions = list(), + alias = colname + )), + if (length(extra_cols_post)) map(extra_cols_post, relexpr_constant, val = NA_integer_), + NULL + ) rel_project(rel, unname(proj_exprs)) } diff --git a/R/union_all.R b/R/union_all.R index 0279a261..727b7394 100644 --- a/R/union_all.R +++ b/R/union_all.R @@ -24,7 +24,14 @@ union_all.duckplyr_df <- function(x, y, ...) { y_rel <- rel_project(y_rel, exprs) } + x_rel <- oo_prep(x_rel, "___row_number_x", extra_cols_post = "___row_number_y") + y_rel <- oo_prep(y_rel, "___row_number_y", extra_cols_pre = "___row_number_x") + rel <- rel_union_all(x_rel, y_rel) + + # NULLs sort first in duckdb! + rel <- oo_restore(rel, c("___row_number_x", "___row_number_y")) + out <- rel_to_df(rel) out <- dplyr_reconstruct(out, x) return(out) diff --git a/patch/union_all.patch b/patch/union_all.patch index 72c6fb91..1437bfba 100644 --- a/patch/union_all.patch +++ b/patch/union_all.patch @@ -1,8 +1,8 @@ diff --git b/R/union_all.R a/R/union_all.R -index ed860f1..0279a26 100644 +index ed860f1..727b739 100644 --- b/R/union_all.R +++ a/R/union_all.R -@@ -2,9 +2,31 @@ +@@ -2,9 +2,38 @@ #' @export union_all.duckplyr_df <- function(x, y, ...) { # Our implementation @@ -29,7 +29,14 @@ index ed860f1..0279a26 100644 + y_rel <- rel_project(y_rel, exprs) + } + ++ x_rel <- oo_prep(x_rel, "___row_number_x", extra_cols_post = "___row_number_y") ++ y_rel <- oo_prep(y_rel, "___row_number_y", extra_cols_pre = "___row_number_x") ++ + rel <- rel_union_all(x_rel, y_rel) ++ ++ # NULLs sort first in duckdb! ++ rel <- oo_restore(rel, c("___row_number_x", "___row_number_y")) ++ + out <- rel_to_df(rel) + out <- dplyr_reconstruct(out, x) return(out) diff --git a/tests/testthat/test-rel_api.R b/tests/testthat/test-rel_api.R index d9dd1d63..945639cd 100644 --- a/tests/testthat/test-rel_api.R +++ b/tests/testthat/test-rel_api.R @@ -1399,9 +1399,101 @@ test_that("relational union_all(data.frame(a = 1L, b = 3, g = 2L)) %>% distinct( df2 <- data.frame(a = 1L, b = 3, g = 2L) rel2 <- duckdb$rel_from_df(con, df2, experimental = experimental) - rel3 <- duckdb$rel_union_all(rel1, rel2) + rel3 <- duckdb$rel_project( + rel1, + list( + { + tmp_expr <- duckdb$expr_reference("a") + duckdb$expr_set_alias(tmp_expr, "a") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("b") + duckdb$expr_set_alias(tmp_expr, "b") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("g") + duckdb$expr_set_alias(tmp_expr, "g") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_window(duckdb$expr_function("row_number", list()), list(), list(), offset_expr = NULL, default_expr = NULL) + duckdb$expr_set_alias(tmp_expr, "___row_number_x") + tmp_expr + }, + { + tmp_expr <- if ("experimental" %in% names(formals(duckdb$expr_constant))) { + duckdb$expr_constant(NA_integer_, experimental = experimental) + } else { + duckdb$expr_constant(NA_integer_) + } + duckdb$expr_set_alias(tmp_expr, "___row_number_y") + tmp_expr + } + ) + ) rel4 <- duckdb$rel_project( - rel3, + rel2, + list( + { + tmp_expr <- duckdb$expr_reference("a") + duckdb$expr_set_alias(tmp_expr, "a") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("b") + duckdb$expr_set_alias(tmp_expr, "b") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("g") + duckdb$expr_set_alias(tmp_expr, "g") + tmp_expr + }, + { + tmp_expr <- if ("experimental" %in% names(formals(duckdb$expr_constant))) { + duckdb$expr_constant(NA_integer_, experimental = experimental) + } else { + duckdb$expr_constant(NA_integer_) + } + duckdb$expr_set_alias(tmp_expr, "___row_number_x") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_window(duckdb$expr_function("row_number", list()), list(), list(), offset_expr = NULL, default_expr = NULL) + duckdb$expr_set_alias(tmp_expr, "___row_number_y") + tmp_expr + } + ) + ) + rel5 <- duckdb$rel_union_all(rel3, rel4) + rel6 <- duckdb$rel_order( + rel5, + list(duckdb$expr_reference("___row_number_x"), duckdb$expr_reference("___row_number_y")) + ) + rel7 <- duckdb$rel_project( + rel6, + list( + { + tmp_expr <- duckdb$expr_reference("a") + duckdb$expr_set_alias(tmp_expr, "a") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("b") + duckdb$expr_set_alias(tmp_expr, "b") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("g") + duckdb$expr_set_alias(tmp_expr, "g") + tmp_expr + } + ) + ) + rel8 <- duckdb$rel_project( + rel7, list( { tmp_expr <- duckdb$expr_reference("a") @@ -1425,8 +1517,8 @@ test_that("relational union_all(data.frame(a = 1L, b = 3, g = 2L)) %>% distinct( } ) ) - rel5 <- duckdb$rel_project( - rel4, + rel9 <- duckdb$rel_project( + rel8, list( { tmp_expr <- duckdb$expr_reference("g") @@ -1453,8 +1545,8 @@ test_that("relational union_all(data.frame(a = 1L, b = 3, g = 2L)) %>% distinct( } ) ) - rel6 <- duckdb$rel_filter( - rel5, + rel10 <- duckdb$rel_filter( + rel9, list( duckdb$expr_function( "==", @@ -1469,9 +1561,9 @@ test_that("relational union_all(data.frame(a = 1L, b = 3, g = 2L)) %>% distinct( ) ) ) - rel7 <- duckdb$rel_order(rel6, list(duckdb$expr_reference("___row_number"))) - rel8 <- duckdb$rel_project( - rel7, + rel11 <- duckdb$rel_order(rel10, list(duckdb$expr_reference("___row_number"))) + rel12 <- duckdb$rel_project( + rel11, list( { tmp_expr <- duckdb$expr_reference("g") @@ -1480,8 +1572,8 @@ test_that("relational union_all(data.frame(a = 1L, b = 3, g = 2L)) %>% distinct( } ) ) - rel8 - out <- duckdb$rel_to_altrep(rel8) + rel12 + out <- duckdb$rel_to_altrep(rel12) expect_equal( out, data.frame(g = 1:3) @@ -1500,9 +1592,101 @@ test_that("relational union_all(data.frame(a = 1L, b = 4, g = 2L)) %>% distinct( df2 <- data.frame(a = 1L, b = 4, g = 2L) rel2 <- duckdb$rel_from_df(con, df2, experimental = experimental) - rel3 <- duckdb$rel_union_all(rel1, rel2) + rel3 <- duckdb$rel_project( + rel1, + list( + { + tmp_expr <- duckdb$expr_reference("a") + duckdb$expr_set_alias(tmp_expr, "a") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("b") + duckdb$expr_set_alias(tmp_expr, "b") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("g") + duckdb$expr_set_alias(tmp_expr, "g") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_window(duckdb$expr_function("row_number", list()), list(), list(), offset_expr = NULL, default_expr = NULL) + duckdb$expr_set_alias(tmp_expr, "___row_number_x") + tmp_expr + }, + { + tmp_expr <- if ("experimental" %in% names(formals(duckdb$expr_constant))) { + duckdb$expr_constant(NA_integer_, experimental = experimental) + } else { + duckdb$expr_constant(NA_integer_) + } + duckdb$expr_set_alias(tmp_expr, "___row_number_y") + tmp_expr + } + ) + ) rel4 <- duckdb$rel_project( - rel3, + rel2, + list( + { + tmp_expr <- duckdb$expr_reference("a") + duckdb$expr_set_alias(tmp_expr, "a") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("b") + duckdb$expr_set_alias(tmp_expr, "b") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("g") + duckdb$expr_set_alias(tmp_expr, "g") + tmp_expr + }, + { + tmp_expr <- if ("experimental" %in% names(formals(duckdb$expr_constant))) { + duckdb$expr_constant(NA_integer_, experimental = experimental) + } else { + duckdb$expr_constant(NA_integer_) + } + duckdb$expr_set_alias(tmp_expr, "___row_number_x") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_window(duckdb$expr_function("row_number", list()), list(), list(), offset_expr = NULL, default_expr = NULL) + duckdb$expr_set_alias(tmp_expr, "___row_number_y") + tmp_expr + } + ) + ) + rel5 <- duckdb$rel_union_all(rel3, rel4) + rel6 <- duckdb$rel_order( + rel5, + list(duckdb$expr_reference("___row_number_x"), duckdb$expr_reference("___row_number_y")) + ) + rel7 <- duckdb$rel_project( + rel6, + list( + { + tmp_expr <- duckdb$expr_reference("a") + duckdb$expr_set_alias(tmp_expr, "a") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("b") + duckdb$expr_set_alias(tmp_expr, "b") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("g") + duckdb$expr_set_alias(tmp_expr, "g") + tmp_expr + } + ) + ) + rel8 <- duckdb$rel_project( + rel7, list( { tmp_expr <- duckdb$expr_reference("a") @@ -1526,8 +1710,8 @@ test_that("relational union_all(data.frame(a = 1L, b = 4, g = 2L)) %>% distinct( } ) ) - rel5 <- duckdb$rel_project( - rel4, + rel9 <- duckdb$rel_project( + rel8, list( { tmp_expr <- duckdb$expr_reference("g") @@ -1554,8 +1738,8 @@ test_that("relational union_all(data.frame(a = 1L, b = 4, g = 2L)) %>% distinct( } ) ) - rel6 <- duckdb$rel_filter( - rel5, + rel10 <- duckdb$rel_filter( + rel9, list( duckdb$expr_function( "==", @@ -1570,9 +1754,9 @@ test_that("relational union_all(data.frame(a = 1L, b = 4, g = 2L)) %>% distinct( ) ) ) - rel7 <- duckdb$rel_order(rel6, list(duckdb$expr_reference("___row_number"))) - rel8 <- duckdb$rel_project( - rel7, + rel11 <- duckdb$rel_order(rel10, list(duckdb$expr_reference("___row_number"))) + rel12 <- duckdb$rel_project( + rel11, list( { tmp_expr <- duckdb$expr_reference("g") @@ -1581,8 +1765,8 @@ test_that("relational union_all(data.frame(a = 1L, b = 4, g = 2L)) %>% distinct( } ) ) - rel8 - out <- duckdb$rel_to_altrep(rel8) + rel12 + out <- duckdb$rel_to_altrep(rel12) expect_equal( out, data.frame(g = 1:3) @@ -1601,9 +1785,101 @@ test_that("relational union_all(data.frame(a = 1L, b = 5, g = 2L)) %>% distinct( df2 <- data.frame(a = 1L, b = 5, g = 2L) rel2 <- duckdb$rel_from_df(con, df2, experimental = experimental) - rel3 <- duckdb$rel_union_all(rel1, rel2) + rel3 <- duckdb$rel_project( + rel1, + list( + { + tmp_expr <- duckdb$expr_reference("a") + duckdb$expr_set_alias(tmp_expr, "a") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("b") + duckdb$expr_set_alias(tmp_expr, "b") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("g") + duckdb$expr_set_alias(tmp_expr, "g") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_window(duckdb$expr_function("row_number", list()), list(), list(), offset_expr = NULL, default_expr = NULL) + duckdb$expr_set_alias(tmp_expr, "___row_number_x") + tmp_expr + }, + { + tmp_expr <- if ("experimental" %in% names(formals(duckdb$expr_constant))) { + duckdb$expr_constant(NA_integer_, experimental = experimental) + } else { + duckdb$expr_constant(NA_integer_) + } + duckdb$expr_set_alias(tmp_expr, "___row_number_y") + tmp_expr + } + ) + ) rel4 <- duckdb$rel_project( - rel3, + rel2, + list( + { + tmp_expr <- duckdb$expr_reference("a") + duckdb$expr_set_alias(tmp_expr, "a") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("b") + duckdb$expr_set_alias(tmp_expr, "b") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("g") + duckdb$expr_set_alias(tmp_expr, "g") + tmp_expr + }, + { + tmp_expr <- if ("experimental" %in% names(formals(duckdb$expr_constant))) { + duckdb$expr_constant(NA_integer_, experimental = experimental) + } else { + duckdb$expr_constant(NA_integer_) + } + duckdb$expr_set_alias(tmp_expr, "___row_number_x") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_window(duckdb$expr_function("row_number", list()), list(), list(), offset_expr = NULL, default_expr = NULL) + duckdb$expr_set_alias(tmp_expr, "___row_number_y") + tmp_expr + } + ) + ) + rel5 <- duckdb$rel_union_all(rel3, rel4) + rel6 <- duckdb$rel_order( + rel5, + list(duckdb$expr_reference("___row_number_x"), duckdb$expr_reference("___row_number_y")) + ) + rel7 <- duckdb$rel_project( + rel6, + list( + { + tmp_expr <- duckdb$expr_reference("a") + duckdb$expr_set_alias(tmp_expr, "a") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("b") + duckdb$expr_set_alias(tmp_expr, "b") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("g") + duckdb$expr_set_alias(tmp_expr, "g") + tmp_expr + } + ) + ) + rel8 <- duckdb$rel_project( + rel7, list( { tmp_expr <- duckdb$expr_reference("a") @@ -1627,8 +1903,8 @@ test_that("relational union_all(data.frame(a = 1L, b = 5, g = 2L)) %>% distinct( } ) ) - rel5 <- duckdb$rel_project( - rel4, + rel9 <- duckdb$rel_project( + rel8, list( { tmp_expr <- duckdb$expr_reference("g") @@ -1655,8 +1931,8 @@ test_that("relational union_all(data.frame(a = 1L, b = 5, g = 2L)) %>% distinct( } ) ) - rel6 <- duckdb$rel_filter( - rel5, + rel10 <- duckdb$rel_filter( + rel9, list( duckdb$expr_function( "==", @@ -1671,9 +1947,9 @@ test_that("relational union_all(data.frame(a = 1L, b = 5, g = 2L)) %>% distinct( ) ) ) - rel7 <- duckdb$rel_order(rel6, list(duckdb$expr_reference("___row_number"))) - rel8 <- duckdb$rel_project( - rel7, + rel11 <- duckdb$rel_order(rel10, list(duckdb$expr_reference("___row_number"))) + rel12 <- duckdb$rel_project( + rel11, list( { tmp_expr <- duckdb$expr_reference("g") @@ -1682,8 +1958,8 @@ test_that("relational union_all(data.frame(a = 1L, b = 5, g = 2L)) %>% distinct( } ) ) - rel8 - out <- duckdb$rel_to_altrep(rel8) + rel12 + out <- duckdb$rel_to_altrep(rel12) expect_equal( out, data.frame(g = 1:3) @@ -1702,9 +1978,235 @@ test_that("relational union_all(data.frame(a = 1L, b = 6, g = 2L)) %>% distinct( df2 <- data.frame(a = 1L, b = 6, g = 2L) rel2 <- duckdb$rel_from_df(con, df2, experimental = experimental) - rel3 <- duckdb$rel_union_all(rel1, rel2) + rel3 <- duckdb$rel_project( + rel1, + list( + { + tmp_expr <- duckdb$expr_reference("a") + duckdb$expr_set_alias(tmp_expr, "a") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("b") + duckdb$expr_set_alias(tmp_expr, "b") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("g") + duckdb$expr_set_alias(tmp_expr, "g") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_window(duckdb$expr_function("row_number", list()), list(), list(), offset_expr = NULL, default_expr = NULL) + duckdb$expr_set_alias(tmp_expr, "___row_number_x") + tmp_expr + }, + { + tmp_expr <- if ("experimental" %in% names(formals(duckdb$expr_constant))) { + duckdb$expr_constant(NA_integer_, experimental = experimental) + } else { + duckdb$expr_constant(NA_integer_) + } + duckdb$expr_set_alias(tmp_expr, "___row_number_y") + tmp_expr + } + ) + ) + rel4 <- duckdb$rel_project( + rel2, + list( + { + tmp_expr <- duckdb$expr_reference("a") + duckdb$expr_set_alias(tmp_expr, "a") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("b") + duckdb$expr_set_alias(tmp_expr, "b") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("g") + duckdb$expr_set_alias(tmp_expr, "g") + tmp_expr + }, + { + tmp_expr <- if ("experimental" %in% names(formals(duckdb$expr_constant))) { + duckdb$expr_constant(NA_integer_, experimental = experimental) + } else { + duckdb$expr_constant(NA_integer_) + } + duckdb$expr_set_alias(tmp_expr, "___row_number_x") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_window(duckdb$expr_function("row_number", list()), list(), list(), offset_expr = NULL, default_expr = NULL) + duckdb$expr_set_alias(tmp_expr, "___row_number_y") + tmp_expr + } + ) + ) + rel5 <- duckdb$rel_union_all(rel3, rel4) + rel6 <- duckdb$rel_order( + rel5, + list(duckdb$expr_reference("___row_number_x"), duckdb$expr_reference("___row_number_y")) + ) + rel7 <- duckdb$rel_project( + rel6, + list( + { + tmp_expr <- duckdb$expr_reference("a") + duckdb$expr_set_alias(tmp_expr, "a") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("b") + duckdb$expr_set_alias(tmp_expr, "b") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("g") + duckdb$expr_set_alias(tmp_expr, "g") + tmp_expr + } + ) + ) + rel8 <- duckdb$rel_project( + rel7, + list( + { + tmp_expr <- duckdb$expr_reference("a") + duckdb$expr_set_alias(tmp_expr, "a") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("b") + duckdb$expr_set_alias(tmp_expr, "b") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("g") + duckdb$expr_set_alias(tmp_expr, "g") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_window(duckdb$expr_function("row_number", list()), list(), list(), offset_expr = NULL, default_expr = NULL) + duckdb$expr_set_alias(tmp_expr, "___row_number") + tmp_expr + } + ) + ) + rel9 <- duckdb$rel_project( + rel8, + list( + { + tmp_expr <- duckdb$expr_reference("g") + duckdb$expr_set_alias(tmp_expr, "g") + tmp_expr + }, + duckdb$expr_reference("___row_number"), + { + tmp_expr <- duckdb$expr_window( + duckdb$expr_function("row_number", list()), + list( + g = { + tmp_expr <- duckdb$expr_reference("g") + duckdb$expr_set_alias(tmp_expr, "g") + tmp_expr + } + ), + list(), + offset_expr = NULL, + default_expr = NULL + ) + duckdb$expr_set_alias(tmp_expr, "___row_number_by") + tmp_expr + } + ) + ) + rel10 <- duckdb$rel_filter( + rel9, + list( + duckdb$expr_function( + "==", + list( + duckdb$expr_reference("___row_number_by"), + if ("experimental" %in% names(formals(duckdb$expr_constant))) { + duckdb$expr_constant(1L, experimental = experimental) + } else { + duckdb$expr_constant(1L) + } + ) + ) + ) + ) + rel11 <- duckdb$rel_order(rel10, list(duckdb$expr_reference("___row_number"))) + rel12 <- duckdb$rel_project( + rel11, + list( + { + tmp_expr <- duckdb$expr_reference("g") + duckdb$expr_set_alias(tmp_expr, "g") + tmp_expr + } + ) + ) + rel12 + out <- duckdb$rel_to_altrep(rel12) + expect_equal( + out, + data.frame(g = 1:3) + ) + DBI::dbDisconnect(con, shutdown = TRUE) +}) + +test_that("relational union_all(data.frame(a = 1L, b = 7, g = 2L)) %>% distinct(g) order-preserving", { + # Autogenerated + con <- DBI::dbConnect(duckdb::duckdb()) + experimental <- FALSE + invisible(DBI::dbExecute(con, "CREATE MACRO \"==\"(x, y) AS x = y")) + df1 <- data.frame(a = seq(1, 6, by = 1), b = rep(2, 6L), g = c(1L, 2L, 2L, 3L, 3L, 3L)) + + rel1 <- duckdb$rel_from_df(con, df1, experimental = experimental) + df2 <- data.frame(a = 1L, b = 7, g = 2L) + + rel2 <- duckdb$rel_from_df(con, df2, experimental = experimental) + rel3 <- duckdb$rel_project( + rel1, + list( + { + tmp_expr <- duckdb$expr_reference("a") + duckdb$expr_set_alias(tmp_expr, "a") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("b") + duckdb$expr_set_alias(tmp_expr, "b") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("g") + duckdb$expr_set_alias(tmp_expr, "g") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_window(duckdb$expr_function("row_number", list()), list(), list(), offset_expr = NULL, default_expr = NULL) + duckdb$expr_set_alias(tmp_expr, "___row_number_x") + tmp_expr + }, + { + tmp_expr <- if ("experimental" %in% names(formals(duckdb$expr_constant))) { + duckdb$expr_constant(NA_integer_, experimental = experimental) + } else { + duckdb$expr_constant(NA_integer_) + } + duckdb$expr_set_alias(tmp_expr, "___row_number_y") + tmp_expr + } + ) + ) rel4 <- duckdb$rel_project( - rel3, + rel2, list( { tmp_expr <- duckdb$expr_reference("a") @@ -1721,61 +2223,40 @@ test_that("relational union_all(data.frame(a = 1L, b = 6, g = 2L)) %>% distinct( duckdb$expr_set_alias(tmp_expr, "g") tmp_expr }, + { + tmp_expr <- if ("experimental" %in% names(formals(duckdb$expr_constant))) { + duckdb$expr_constant(NA_integer_, experimental = experimental) + } else { + duckdb$expr_constant(NA_integer_) + } + duckdb$expr_set_alias(tmp_expr, "___row_number_x") + tmp_expr + }, { tmp_expr <- duckdb$expr_window(duckdb$expr_function("row_number", list()), list(), list(), offset_expr = NULL, default_expr = NULL) - duckdb$expr_set_alias(tmp_expr, "___row_number") + duckdb$expr_set_alias(tmp_expr, "___row_number_y") tmp_expr } ) ) - rel5 <- duckdb$rel_project( - rel4, + rel5 <- duckdb$rel_union_all(rel3, rel4) + rel6 <- duckdb$rel_order( + rel5, + list(duckdb$expr_reference("___row_number_x"), duckdb$expr_reference("___row_number_y")) + ) + rel7 <- duckdb$rel_project( + rel6, list( { - tmp_expr <- duckdb$expr_reference("g") - duckdb$expr_set_alias(tmp_expr, "g") + tmp_expr <- duckdb$expr_reference("a") + duckdb$expr_set_alias(tmp_expr, "a") tmp_expr }, - duckdb$expr_reference("___row_number"), { - tmp_expr <- duckdb$expr_window( - duckdb$expr_function("row_number", list()), - list( - g = { - tmp_expr <- duckdb$expr_reference("g") - duckdb$expr_set_alias(tmp_expr, "g") - tmp_expr - } - ), - list(), - offset_expr = NULL, - default_expr = NULL - ) - duckdb$expr_set_alias(tmp_expr, "___row_number_by") + tmp_expr <- duckdb$expr_reference("b") + duckdb$expr_set_alias(tmp_expr, "b") tmp_expr - } - ) - ) - rel6 <- duckdb$rel_filter( - rel5, - list( - duckdb$expr_function( - "==", - list( - duckdb$expr_reference("___row_number_by"), - if ("experimental" %in% names(formals(duckdb$expr_constant))) { - duckdb$expr_constant(1L, experimental = experimental) - } else { - duckdb$expr_constant(1L) - } - ) - ) - ) - ) - rel7 <- duckdb$rel_order(rel6, list(duckdb$expr_reference("___row_number"))) - rel8 <- duckdb$rel_project( - rel7, - list( + }, { tmp_expr <- duckdb$expr_reference("g") duckdb$expr_set_alias(tmp_expr, "g") @@ -1783,29 +2264,8 @@ test_that("relational union_all(data.frame(a = 1L, b = 6, g = 2L)) %>% distinct( } ) ) - rel8 - out <- duckdb$rel_to_altrep(rel8) - expect_equal( - out, - data.frame(g = 1:3) - ) - DBI::dbDisconnect(con, shutdown = TRUE) -}) - -test_that("relational union_all(data.frame(a = 1L, b = 7, g = 2L)) %>% distinct(g) order-preserving", { - # Autogenerated - con <- DBI::dbConnect(duckdb::duckdb()) - experimental <- FALSE - invisible(DBI::dbExecute(con, "CREATE MACRO \"==\"(x, y) AS x = y")) - df1 <- data.frame(a = seq(1, 6, by = 1), b = rep(2, 6L), g = c(1L, 2L, 2L, 3L, 3L, 3L)) - - rel1 <- duckdb$rel_from_df(con, df1, experimental = experimental) - df2 <- data.frame(a = 1L, b = 7, g = 2L) - - rel2 <- duckdb$rel_from_df(con, df2, experimental = experimental) - rel3 <- duckdb$rel_union_all(rel1, rel2) - rel4 <- duckdb$rel_project( - rel3, + rel8 <- duckdb$rel_project( + rel7, list( { tmp_expr <- duckdb$expr_reference("a") @@ -1829,8 +2289,8 @@ test_that("relational union_all(data.frame(a = 1L, b = 7, g = 2L)) %>% distinct( } ) ) - rel5 <- duckdb$rel_project( - rel4, + rel9 <- duckdb$rel_project( + rel8, list( { tmp_expr <- duckdb$expr_reference("g") @@ -1857,8 +2317,8 @@ test_that("relational union_all(data.frame(a = 1L, b = 7, g = 2L)) %>% distinct( } ) ) - rel6 <- duckdb$rel_filter( - rel5, + rel10 <- duckdb$rel_filter( + rel9, list( duckdb$expr_function( "==", @@ -1873,9 +2333,9 @@ test_that("relational union_all(data.frame(a = 1L, b = 7, g = 2L)) %>% distinct( ) ) ) - rel7 <- duckdb$rel_order(rel6, list(duckdb$expr_reference("___row_number"))) - rel8 <- duckdb$rel_project( - rel7, + rel11 <- duckdb$rel_order(rel10, list(duckdb$expr_reference("___row_number"))) + rel12 <- duckdb$rel_project( + rel11, list( { tmp_expr <- duckdb$expr_reference("g") @@ -1884,8 +2344,8 @@ test_that("relational union_all(data.frame(a = 1L, b = 7, g = 2L)) %>% distinct( } ) ) - rel8 - out <- duckdb$rel_to_altrep(rel8) + rel12 + out <- duckdb$rel_to_altrep(rel12) expect_equal( out, data.frame(g = 1:3) @@ -10594,9 +11054,86 @@ test_that("relational union() order-preserving", { df2 <- data.frame(a = 2:5, b = rep(2, 4L)) rel2 <- duckdb$rel_from_df(con, df2, experimental = experimental) - rel3 <- duckdb$rel_union_all(rel1, rel2) + rel3 <- duckdb$rel_project( + rel1, + list( + { + tmp_expr <- duckdb$expr_reference("a") + duckdb$expr_set_alias(tmp_expr, "a") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("b") + duckdb$expr_set_alias(tmp_expr, "b") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_window(duckdb$expr_function("row_number", list()), list(), list(), offset_expr = NULL, default_expr = NULL) + duckdb$expr_set_alias(tmp_expr, "___row_number_x") + tmp_expr + }, + { + tmp_expr <- if ("experimental" %in% names(formals(duckdb$expr_constant))) { + duckdb$expr_constant(NA_integer_, experimental = experimental) + } else { + duckdb$expr_constant(NA_integer_) + } + duckdb$expr_set_alias(tmp_expr, "___row_number_y") + tmp_expr + } + ) + ) rel4 <- duckdb$rel_project( - rel3, + rel2, + list( + { + tmp_expr <- duckdb$expr_reference("a") + duckdb$expr_set_alias(tmp_expr, "a") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("b") + duckdb$expr_set_alias(tmp_expr, "b") + tmp_expr + }, + { + tmp_expr <- if ("experimental" %in% names(formals(duckdb$expr_constant))) { + duckdb$expr_constant(NA_integer_, experimental = experimental) + } else { + duckdb$expr_constant(NA_integer_) + } + duckdb$expr_set_alias(tmp_expr, "___row_number_x") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_window(duckdb$expr_function("row_number", list()), list(), list(), offset_expr = NULL, default_expr = NULL) + duckdb$expr_set_alias(tmp_expr, "___row_number_y") + tmp_expr + } + ) + ) + rel5 <- duckdb$rel_union_all(rel3, rel4) + rel6 <- duckdb$rel_order( + rel5, + list(duckdb$expr_reference("___row_number_x"), duckdb$expr_reference("___row_number_y")) + ) + rel7 <- duckdb$rel_project( + rel6, + list( + { + tmp_expr <- duckdb$expr_reference("a") + duckdb$expr_set_alias(tmp_expr, "a") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("b") + duckdb$expr_set_alias(tmp_expr, "b") + tmp_expr + } + ) + ) + rel8 <- duckdb$rel_project( + rel7, list( { tmp_expr <- duckdb$expr_reference("a") @@ -10615,8 +11152,8 @@ test_that("relational union() order-preserving", { } ) ) - rel5 <- duckdb$rel_project( - rel4, + rel9 <- duckdb$rel_project( + rel8, list( { tmp_expr <- duckdb$expr_reference("a") @@ -10653,8 +11190,8 @@ test_that("relational union() order-preserving", { } ) ) - rel6 <- duckdb$rel_filter( - rel5, + rel10 <- duckdb$rel_filter( + rel9, list( duckdb$expr_function( "==", @@ -10669,9 +11206,9 @@ test_that("relational union() order-preserving", { ) ) ) - rel7 <- duckdb$rel_order(rel6, list(duckdb$expr_reference("___row_number"))) - rel8 <- duckdb$rel_project( - rel7, + rel11 <- duckdb$rel_order(rel10, list(duckdb$expr_reference("___row_number"))) + rel12 <- duckdb$rel_project( + rel11, list( { tmp_expr <- duckdb$expr_reference("a") @@ -10685,8 +11222,8 @@ test_that("relational union() order-preserving", { } ) ) - rel8 - out <- duckdb$rel_to_altrep(rel8) + rel12 + out <- duckdb$rel_to_altrep(rel12) expect_equal( out, data.frame(a = 1:5, b = rep(2, 5L)) @@ -10730,9 +11267,86 @@ test_that("relational union_all() order-preserving", { df2 <- data.frame(a = 2:5, b = rep(2, 4L)) rel2 <- duckdb$rel_from_df(con, df2, experimental = experimental) - rel3 <- duckdb$rel_union_all(rel1, rel2) - rel3 - out <- duckdb$rel_to_altrep(rel3) + rel3 <- duckdb$rel_project( + rel1, + list( + { + tmp_expr <- duckdb$expr_reference("a") + duckdb$expr_set_alias(tmp_expr, "a") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("b") + duckdb$expr_set_alias(tmp_expr, "b") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_window(duckdb$expr_function("row_number", list()), list(), list(), offset_expr = NULL, default_expr = NULL) + duckdb$expr_set_alias(tmp_expr, "___row_number_x") + tmp_expr + }, + { + tmp_expr <- if ("experimental" %in% names(formals(duckdb$expr_constant))) { + duckdb$expr_constant(NA_integer_, experimental = experimental) + } else { + duckdb$expr_constant(NA_integer_) + } + duckdb$expr_set_alias(tmp_expr, "___row_number_y") + tmp_expr + } + ) + ) + rel4 <- duckdb$rel_project( + rel2, + list( + { + tmp_expr <- duckdb$expr_reference("a") + duckdb$expr_set_alias(tmp_expr, "a") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("b") + duckdb$expr_set_alias(tmp_expr, "b") + tmp_expr + }, + { + tmp_expr <- if ("experimental" %in% names(formals(duckdb$expr_constant))) { + duckdb$expr_constant(NA_integer_, experimental = experimental) + } else { + duckdb$expr_constant(NA_integer_) + } + duckdb$expr_set_alias(tmp_expr, "___row_number_x") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_window(duckdb$expr_function("row_number", list()), list(), list(), offset_expr = NULL, default_expr = NULL) + duckdb$expr_set_alias(tmp_expr, "___row_number_y") + tmp_expr + } + ) + ) + rel5 <- duckdb$rel_union_all(rel3, rel4) + rel6 <- duckdb$rel_order( + rel5, + list(duckdb$expr_reference("___row_number_x"), duckdb$expr_reference("___row_number_y")) + ) + rel7 <- duckdb$rel_project( + rel6, + list( + { + tmp_expr <- duckdb$expr_reference("a") + duckdb$expr_set_alias(tmp_expr, "a") + tmp_expr + }, + { + tmp_expr <- duckdb$expr_reference("b") + duckdb$expr_set_alias(tmp_expr, "b") + tmp_expr + } + ) + ) + rel7 + out <- duckdb$rel_to_altrep(rel7) expect_equal( out, data.frame(a = c(1L, 2L, 3L, 4L, 2L, 3L, 4L, 5L), b = rep(2, 8L)) diff --git a/tools/00-funs.R b/tools/00-funs.R index 79a35e00..85989a85 100644 --- a/tools/00-funs.R +++ b/tools/00-funs.R @@ -43,6 +43,7 @@ df_methods <- ))) %>% mutate(code = unname(mget(fun, dplyr))) +# FIXME: c(a = list(...), NULL) instead of head(...) duckplyr_tests <- head(n = -1, list( "helper-s3.R" = c( NULL From 35345bba300e38282791adb06f713dec40d709f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Thu, 7 Sep 2023 11:19:08 +0200 Subject: [PATCH 3/3] Add duckdb remote --- DESCRIPTION | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index f48b84d2..f56b2bb8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -20,7 +20,7 @@ Imports: collections, DBI, dplyr (>= 1.1.3), - duckdb (>= 0.8.1-3), + duckdb (>= 0.8.1-9000), glue, lifecycle, purrr, @@ -46,6 +46,8 @@ Suggests: styler, testthat (>= 3.1.5), withr +Remotes: + duckdb=duckdb/duckdb-r Config/testthat/edition: 3 Config/testthat/parallel: false Config/testthat/start-first: as_duckplyr_df, mutate, filter, count-tally