From 2438c2bcb67eee84fee84800ddb6412d25794bdb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Fri, 8 Mar 2024 05:30:50 +0100 Subject: [PATCH] chore: Add telemetry for joins --- R/anti_join.R | 2 +- R/full_join.R | 2 +- R/inner_join.R | 2 +- R/left_join.R | 2 +- R/right_join.R | 2 +- R/semi_join.R | 2 +- R/telemetry.R | 7 ++ tests/testthat/_snaps/telemetry.md | 76 ++++++++++++++++++ tests/testthat/test-telemetry.R | 119 +++++++++++++++++++++++++++++ 9 files changed, 208 insertions(+), 6 deletions(-) diff --git a/R/anti_join.R b/R/anti_join.R index 2ba21154..2d4d7ee2 100644 --- a/R/anti_join.R +++ b/R/anti_join.R @@ -8,7 +8,7 @@ anti_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, ..., na_matches na_matches <- check_na_matches(na_matches, error_call = error_call) # Our implementation - rel_try( + rel_try(call = list(name = "anti_join", x = x, y = y, args = list(by = if(!is.null(by)) as_join_by(by), copy = copy, na_matches = na_matches)), "No relational implementation for anti_join(copy = TRUE)" = copy, { out <- rel_join_impl(x, y, by, "anti", na_matches, error_call = error_call) diff --git a/R/full_join.R b/R/full_join.R index 53a41e5b..2e7606ff 100644 --- a/R/full_join.R +++ b/R/full_join.R @@ -5,7 +5,7 @@ full_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x" error_call <- caller_env() # Our implementation - rel_try( + rel_try(call = list(name = "full_join", x = x, y = y, args = list(by = if(!is.null(by)) as_join_by(by), copy = copy, keep = keep, na_matches = na_matches, multiple = multiple, relationship = relationship)), "No implicit cross joins for full_join()" = is_cross_by(by), "No relational implementation for full_join(copy = TRUE)" = copy, { diff --git a/R/inner_join.R b/R/inner_join.R index de92416c..3e76193b 100644 --- a/R/inner_join.R +++ b/R/inner_join.R @@ -5,7 +5,7 @@ inner_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x error_call <- caller_env() # Our implementation - rel_try( + rel_try(call = list(name = "inner_join", x = x, y = y, args = list(by = if(!is.null(by)) as_join_by(by), copy = copy, keep = keep, na_matches = na_matches, multiple = multiple, unmatched = unmatched, relationship = relationship)), "No implicit cross joins for inner_join()" = is_cross_by(by), "No relational implementation for inner_join(copy = TRUE)" = copy, { diff --git a/R/left_join.R b/R/left_join.R index c0febb7f..60c2a4a7 100644 --- a/R/left_join.R +++ b/R/left_join.R @@ -5,7 +5,7 @@ left_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x" error_call <- caller_env() # Our implementation - rel_try( + rel_try(call = list(name = "left_join", x = x, y = y, args = list(by = if(!is.null(by)) as_join_by(by), copy = copy, keep = keep, na_matches = na_matches, multiple = multiple, unmatched = unmatched, relationship = relationship)), "No implicit cross joins for left_join()" = is_cross_by(by), "No relational implementation for left_join(copy = TRUE)" = copy, { diff --git a/R/right_join.R b/R/right_join.R index 13918d3b..ec1e4741 100644 --- a/R/right_join.R +++ b/R/right_join.R @@ -5,7 +5,7 @@ right_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x error_call <- caller_env() # Our implementation - rel_try( + rel_try(call = list(name = "right_join", x = x, y = y, args = list(by = if(!is.null(by)) as_join_by(by), copy = copy, keep = keep, na_matches = na_matches, multiple = multiple, unmatched = unmatched, relationship = relationship)), "No implicit cross joins for right_join()" = is_cross_by(by), "No relational implementation for right_join(copy = TRUE)" = copy, { diff --git a/R/semi_join.R b/R/semi_join.R index 7e2f47b8..3cbfef13 100644 --- a/R/semi_join.R +++ b/R/semi_join.R @@ -8,7 +8,7 @@ semi_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, ..., na_matches na_matches <- check_na_matches(na_matches, error_call = error_call) # Our implementation - rel_try( + rel_try(call = list(name = "semi_join", x = x, y = y, args = list(by = if(!is.null(by)) as_join_by(by), copy = copy, na_matches = na_matches)), "No relational implementation for semi_join(copy = TRUE)" = copy, { out <- rel_join_impl(x, y, by, "semi", na_matches, error_call = error_call) diff --git a/R/telemetry.R b/R/telemetry.R index d9061a88..0696cc90 100644 --- a/R/telemetry.R +++ b/R/telemetry.R @@ -44,6 +44,13 @@ arg_to_json <- function(x, name_map) { quo_to_json(x, name_map) } else if (is_call(x) || is_symbol(x)) { expr_to_json(x, name_map) + } else if (inherits(x, "dplyr_join_by")) { + list( + condition = x$condition, + filter = x$filter, + x = arg_to_json(syms(x$x), name_map), + y = arg_to_json(syms(x$y), name_map) + ) } else if (is.list(x)) { map(x, ~ arg_to_json(.x, name_map)) } else { diff --git a/tests/testthat/_snaps/telemetry.md b/tests/testthat/_snaps/telemetry.md index 4ba6cb03..b52eaddf 100644 --- a/tests/testthat/_snaps/telemetry.md +++ b/tests/testthat/_snaps/telemetry.md @@ -1,3 +1,30 @@ +# telemetry and anti_join() + + Code + tibble(a = 1:3, b = 4:6) %>% as_duckplyr_df() %>% anti_join(tibble(a = 1:3, b = 4: + 6), by = "a", copy = TRUE, na_matches = "na") + Condition + Error in `rel_try()`: + ! anti_join: {"message":"Error in anti_join","name":"anti_join","x":{"...1":"integer","...2":"integer"},"y":{"...1":"integer","...2":"integer"},"args":{"by":{"condition":"==","filter":"none","x":["...1"],"y":["...1"]},"copy":true,"na_matches":"na"}} + +--- + + Code + tibble(a = 1:3, b = 4:6) %>% as_duckplyr_df() %>% anti_join(tibble(a = 1:3, b = 4: + 6), by = c(a = "b"), copy = TRUE, na_matches = "na") + Condition + Error in `rel_try()`: + ! anti_join: {"message":"Error in anti_join","name":"anti_join","x":{"...1":"integer","...2":"integer"},"y":{"...1":"integer","...2":"integer"},"args":{"by":{"condition":"==","filter":"none","x":["...1"],"y":["...2"]},"copy":true,"na_matches":"na"}} + +--- + + Code + tibble(a = 1:3, b = 4:6) %>% as_duckplyr_df() %>% anti_join(tibble(a = 1:3, b = 4: + 6), by = join_by(a == b), copy = FALSE, na_matches = "never") + Condition + Error in `rel_try()`: + ! anti_join: {"message":"Error in anti_join","name":"anti_join","x":{"...1":"integer","...2":"integer"},"y":{"...1":"integer","...2":"integer"},"args":{"by":{"condition":"==","filter":"none","x":["...1"],"y":["...2"]},"copy":false,"na_matches":"never"}} + # telemetry and arrange() Code @@ -39,6 +66,26 @@ Error in `rel_try()`: ! filter: {"message":"Error in filter","name":"filter","x":{"...1":"integer","...2":"integer"},"args":{"dots":{"1":"...1 > \"Don't know how to scrub numeric\""},"by":"\"Don't know how to scrub NULL\"","preserve":true}} +# telemetry and full_join() + + Code + tibble(a = 1:3, b = 4:6) %>% as_duckplyr_df() %>% full_join(tibble(a = 1:3, b = 4: + 6), by = "a", copy = TRUE, suffix = c("x", "y"), keep = TRUE, na_matches = "na", + multiple = "all", relationship = "one-to-one") + Condition + Error in `rel_try()`: + ! full_join: {"message":"Error in full_join","name":"full_join","x":{"...1":"integer","...2":"integer"},"y":{"...1":"integer","...2":"integer"},"args":{"by":{"condition":"==","filter":"none","x":["...1"],"y":["...1"]},"copy":true,"keep":true,"na_matches":"na","multiple":"all","relationship":"one-to-one"}} + +# telemetry and inner_join() + + Code + tibble(a = 1:3, b = 4:6) %>% as_duckplyr_df() %>% inner_join(tibble(a = 1:3, b = 4: + 6), by = "a", copy = TRUE, suffix = c("x", "y"), keep = TRUE, na_matches = "na", + multiple = "all", unmatched = "error", relationship = "one-to-one") + Condition + Error in `rel_try()`: + ! inner_join: {"message":"Error in inner_join","name":"inner_join","x":{"...1":"integer","...2":"integer"},"y":{"...1":"integer","...2":"integer"},"args":{"by":{"condition":"==","filter":"none","x":["...1"],"y":["...1"]},"copy":true,"keep":true,"na_matches":"na","multiple":"all","unmatched":"error","relationship":"one-to-one"}} + # telemetry and intersect() Code @@ -48,6 +95,16 @@ Error in `rel_try()`: ! intersect: {"message":"Error in intersect","name":"intersect","x":{"...1":"integer","...2":"integer"},"y":{"...1":"integer","...2":"integer"}} +# telemetry and left_join() + + Code + tibble(a = 1:3, b = 4:6) %>% as_duckplyr_df() %>% left_join(tibble(a = 1:3, b = 4: + 6), by = "a", copy = TRUE, suffix = c("x", "y"), keep = TRUE, na_matches = "na", + multiple = "all", unmatched = "error", relationship = "one-to-one") + Condition + Error in `rel_try()`: + ! left_join: {"message":"Error in left_join","name":"left_join","x":{"...1":"integer","...2":"integer"},"y":{"...1":"integer","...2":"integer"},"args":{"by":{"condition":"==","filter":"none","x":["...1"],"y":["...1"]},"copy":true,"keep":true,"na_matches":"na","multiple":"all","unmatched":"error","relationship":"one-to-one"}} + # telemetry and mutate() Code @@ -73,6 +130,16 @@ Error in `rel_try()`: ! rename: {"message":"Error in rename","name":"rename","x":{"...1":"integer","...2":"integer"},"args":{"dots":{"...3":"...1"}}} +# telemetry and right_join() + + Code + tibble(a = 1:3, b = 4:6) %>% as_duckplyr_df() %>% right_join(tibble(a = 1:3, b = 4: + 6), by = "a", copy = TRUE, suffix = c("x", "y"), keep = TRUE, na_matches = "na", + multiple = "all", unmatched = "error", relationship = "one-to-one") + Condition + Error in `rel_try()`: + ! right_join: {"message":"Error in right_join","name":"right_join","x":{"...1":"integer","...2":"integer"},"y":{"...1":"integer","...2":"integer"},"args":{"by":{"condition":"==","filter":"none","x":["...1"],"y":["...1"]},"copy":true,"keep":true,"na_matches":"na","multiple":"all","unmatched":"error","relationship":"one-to-one"}} + # telemetry and select() Code @@ -81,6 +148,15 @@ Error in `rel_try()`: ! select: {"message":"Error in select","name":"select","x":{"...1":"integer","...2":"integer"},"args":{"dots":{"...3":"...2"}}} +# telemetry and semi_join() + + Code + tibble(a = 1:3, b = 4:6) %>% as_duckplyr_df() %>% semi_join(tibble(a = 1:3, b = 4: + 6), by = "a", copy = TRUE, na_matches = "na") + Condition + Error in `rel_try()`: + ! semi_join: {"message":"Error in semi_join","name":"semi_join","x":{"...1":"integer","...2":"integer"},"y":{"...1":"integer","...2":"integer"},"args":{"by":{"condition":"==","filter":"none","x":["...1"],"y":["...1"]},"copy":true,"na_matches":"na"}} + # telemetry and setdiff() Code diff --git a/tests/testthat/test-telemetry.R b/tests/testthat/test-telemetry.R index dfdc60bd..08fa7341 100644 --- a/tests/testthat/test-telemetry.R +++ b/tests/testthat/test-telemetry.R @@ -1,5 +1,40 @@ withr::local_envvar(DUCKPLYR_TELEMETRY_TEST = TRUE) +test_that("telemetry and anti_join()", { + expect_snapshot(error = TRUE, { + tibble(a = 1:3, b = 4:6) %>% + as_duckplyr_df() %>% + anti_join( + tibble(a = 1:3, b = 4:6), + by = "a", + copy = TRUE, + na_matches = "na" + ) + }) + + expect_snapshot(error = TRUE, { + tibble(a = 1:3, b = 4:6) %>% + as_duckplyr_df() %>% + anti_join( + tibble(a = 1:3, b = 4:6), + by = c("a" = "b"), + copy = TRUE, + na_matches = "na" + ) + }) + + expect_snapshot(error = TRUE, { + tibble(a = 1:3, b = 4:6) %>% + as_duckplyr_df() %>% + anti_join( + tibble(a = 1:3, b = 4:6), + by = join_by(a == b), + copy = FALSE, + na_matches = "never" + ) + }) +}) + test_that("telemetry and arrange()", { expect_snapshot(error = TRUE, { tibble(a = 1:3, b = 4:6) %>% @@ -38,6 +73,41 @@ test_that("telemetry and filter()", { }) }) +test_that("telemetry and full_join()", { + expect_snapshot(error = TRUE, { + tibble(a = 1:3, b = 4:6) %>% + as_duckplyr_df() %>% + full_join( + tibble(a = 1:3, b = 4:6), + by = "a", + copy = TRUE, + suffix = c("x", "y"), + keep = TRUE, + na_matches = "na", + multiple = "all", + relationship = "one-to-one" + ) + }) +}) + +test_that("telemetry and inner_join()", { + expect_snapshot(error = TRUE, { + tibble(a = 1:3, b = 4:6) %>% + as_duckplyr_df() %>% + inner_join( + tibble(a = 1:3, b = 4:6), + by = "a", + copy = TRUE, + suffix = c("x", "y"), + keep = TRUE, + na_matches = "na", + multiple = "all", + unmatched = "error", + relationship = "one-to-one" + ) + }) +}) + test_that("telemetry and intersect()", { expect_snapshot(error = TRUE, { tibble(a = 1:3, b = 4:6) %>% @@ -46,6 +116,24 @@ test_that("telemetry and intersect()", { }) }) +test_that("telemetry and left_join()", { + expect_snapshot(error = TRUE, { + tibble(a = 1:3, b = 4:6) %>% + as_duckplyr_df() %>% + left_join( + tibble(a = 1:3, b = 4:6), + by = "a", + copy = TRUE, + suffix = c("x", "y"), + keep = TRUE, + na_matches = "na", + multiple = "all", + unmatched = "error", + relationship = "one-to-one" + ) + }) +}) + test_that("telemetry and mutate()", { expect_snapshot(error = TRUE, { tibble(a = 1:3, b = 4:6) %>% @@ -70,6 +158,24 @@ test_that("telemetry and rename()", { }) }) +test_that("telemetry and right_join()", { + expect_snapshot(error = TRUE, { + tibble(a = 1:3, b = 4:6) %>% + as_duckplyr_df() %>% + right_join( + tibble(a = 1:3, b = 4:6), + by = "a", + copy = TRUE, + suffix = c("x", "y"), + keep = TRUE, + na_matches = "na", + multiple = "all", + unmatched = "error", + relationship = "one-to-one" + ) + }) +}) + test_that("telemetry and select()", { expect_snapshot(error = TRUE, { tibble(a = 1:3, b = 4:6) %>% @@ -78,6 +184,19 @@ test_that("telemetry and select()", { }) }) +test_that("telemetry and semi_join()", { + expect_snapshot(error = TRUE, { + tibble(a = 1:3, b = 4:6) %>% + as_duckplyr_df() %>% + semi_join( + tibble(a = 1:3, b = 4:6), + by = "a", + copy = TRUE, + na_matches = "na" + ) + }) +}) + test_that("telemetry and setdiff()", { expect_snapshot(error = TRUE, { tibble(a = 1:3, b = 4:6) %>%