Skip to content

Commit

Permalink
chore: Add telemetry for joins
Browse files Browse the repository at this point in the history
  • Loading branch information
krlmlr committed Mar 8, 2024
1 parent 7e7125f commit 2438c2b
Show file tree
Hide file tree
Showing 9 changed files with 208 additions and 6 deletions.
2 changes: 1 addition & 1 deletion R/anti_join.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ anti_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, ..., na_matches
na_matches <- check_na_matches(na_matches, error_call = error_call)

# Our implementation
rel_try(
rel_try(call = list(name = "anti_join", x = x, y = y, args = list(by = if(!is.null(by)) as_join_by(by), copy = copy, na_matches = na_matches)),
"No relational implementation for anti_join(copy = TRUE)" = copy,
{
out <- rel_join_impl(x, y, by, "anti", na_matches, error_call = error_call)
Expand Down
2 changes: 1 addition & 1 deletion R/full_join.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ full_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x"
error_call <- caller_env()

# Our implementation
rel_try(
rel_try(call = list(name = "full_join", x = x, y = y, args = list(by = if(!is.null(by)) as_join_by(by), copy = copy, keep = keep, na_matches = na_matches, multiple = multiple, relationship = relationship)),
"No implicit cross joins for full_join()" = is_cross_by(by),
"No relational implementation for full_join(copy = TRUE)" = copy,
{
Expand Down
2 changes: 1 addition & 1 deletion R/inner_join.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ inner_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x
error_call <- caller_env()

# Our implementation
rel_try(
rel_try(call = list(name = "inner_join", x = x, y = y, args = list(by = if(!is.null(by)) as_join_by(by), copy = copy, keep = keep, na_matches = na_matches, multiple = multiple, unmatched = unmatched, relationship = relationship)),
"No implicit cross joins for inner_join()" = is_cross_by(by),
"No relational implementation for inner_join(copy = TRUE)" = copy,
{
Expand Down
2 changes: 1 addition & 1 deletion R/left_join.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ left_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x"
error_call <- caller_env()

# Our implementation
rel_try(
rel_try(call = list(name = "left_join", x = x, y = y, args = list(by = if(!is.null(by)) as_join_by(by), copy = copy, keep = keep, na_matches = na_matches, multiple = multiple, unmatched = unmatched, relationship = relationship)),
"No implicit cross joins for left_join()" = is_cross_by(by),
"No relational implementation for left_join(copy = TRUE)" = copy,
{
Expand Down
2 changes: 1 addition & 1 deletion R/right_join.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ right_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x
error_call <- caller_env()

# Our implementation
rel_try(
rel_try(call = list(name = "right_join", x = x, y = y, args = list(by = if(!is.null(by)) as_join_by(by), copy = copy, keep = keep, na_matches = na_matches, multiple = multiple, unmatched = unmatched, relationship = relationship)),
"No implicit cross joins for right_join()" = is_cross_by(by),
"No relational implementation for right_join(copy = TRUE)" = copy,
{
Expand Down
2 changes: 1 addition & 1 deletion R/semi_join.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ semi_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, ..., na_matches
na_matches <- check_na_matches(na_matches, error_call = error_call)

# Our implementation
rel_try(
rel_try(call = list(name = "semi_join", x = x, y = y, args = list(by = if(!is.null(by)) as_join_by(by), copy = copy, na_matches = na_matches)),
"No relational implementation for semi_join(copy = TRUE)" = copy,
{
out <- rel_join_impl(x, y, by, "semi", na_matches, error_call = error_call)
Expand Down
7 changes: 7 additions & 0 deletions R/telemetry.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,13 @@ arg_to_json <- function(x, name_map) {
quo_to_json(x, name_map)
} else if (is_call(x) || is_symbol(x)) {
expr_to_json(x, name_map)
} else if (inherits(x, "dplyr_join_by")) {
list(
condition = x$condition,
filter = x$filter,
x = arg_to_json(syms(x$x), name_map),
y = arg_to_json(syms(x$y), name_map)
)
} else if (is.list(x)) {
map(x, ~ arg_to_json(.x, name_map))
} else {
Expand Down
76 changes: 76 additions & 0 deletions tests/testthat/_snaps/telemetry.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,30 @@
# telemetry and anti_join()

Code
tibble(a = 1:3, b = 4:6) %>% as_duckplyr_df() %>% anti_join(tibble(a = 1:3, b = 4:
6), by = "a", copy = TRUE, na_matches = "na")
Condition
Error in `rel_try()`:
! anti_join: {"message":"Error in anti_join","name":"anti_join","x":{"...1":"integer","...2":"integer"},"y":{"...1":"integer","...2":"integer"},"args":{"by":{"condition":"==","filter":"none","x":["...1"],"y":["...1"]},"copy":true,"na_matches":"na"}}

---

Code
tibble(a = 1:3, b = 4:6) %>% as_duckplyr_df() %>% anti_join(tibble(a = 1:3, b = 4:
6), by = c(a = "b"), copy = TRUE, na_matches = "na")
Condition
Error in `rel_try()`:
! anti_join: {"message":"Error in anti_join","name":"anti_join","x":{"...1":"integer","...2":"integer"},"y":{"...1":"integer","...2":"integer"},"args":{"by":{"condition":"==","filter":"none","x":["...1"],"y":["...2"]},"copy":true,"na_matches":"na"}}

---

Code
tibble(a = 1:3, b = 4:6) %>% as_duckplyr_df() %>% anti_join(tibble(a = 1:3, b = 4:
6), by = join_by(a == b), copy = FALSE, na_matches = "never")
Condition
Error in `rel_try()`:
! anti_join: {"message":"Error in anti_join","name":"anti_join","x":{"...1":"integer","...2":"integer"},"y":{"...1":"integer","...2":"integer"},"args":{"by":{"condition":"==","filter":"none","x":["...1"],"y":["...2"]},"copy":false,"na_matches":"never"}}

# telemetry and arrange()

Code
Expand Down Expand Up @@ -39,6 +66,26 @@
Error in `rel_try()`:
! filter: {"message":"Error in filter","name":"filter","x":{"...1":"integer","...2":"integer"},"args":{"dots":{"1":"...1 > \"Don't know how to scrub numeric\""},"by":"\"Don't know how to scrub NULL\"","preserve":true}}

# telemetry and full_join()

Code
tibble(a = 1:3, b = 4:6) %>% as_duckplyr_df() %>% full_join(tibble(a = 1:3, b = 4:
6), by = "a", copy = TRUE, suffix = c("x", "y"), keep = TRUE, na_matches = "na",
multiple = "all", relationship = "one-to-one")
Condition
Error in `rel_try()`:
! full_join: {"message":"Error in full_join","name":"full_join","x":{"...1":"integer","...2":"integer"},"y":{"...1":"integer","...2":"integer"},"args":{"by":{"condition":"==","filter":"none","x":["...1"],"y":["...1"]},"copy":true,"keep":true,"na_matches":"na","multiple":"all","relationship":"one-to-one"}}

# telemetry and inner_join()

Code
tibble(a = 1:3, b = 4:6) %>% as_duckplyr_df() %>% inner_join(tibble(a = 1:3, b = 4:
6), by = "a", copy = TRUE, suffix = c("x", "y"), keep = TRUE, na_matches = "na",
multiple = "all", unmatched = "error", relationship = "one-to-one")
Condition
Error in `rel_try()`:
! inner_join: {"message":"Error in inner_join","name":"inner_join","x":{"...1":"integer","...2":"integer"},"y":{"...1":"integer","...2":"integer"},"args":{"by":{"condition":"==","filter":"none","x":["...1"],"y":["...1"]},"copy":true,"keep":true,"na_matches":"na","multiple":"all","unmatched":"error","relationship":"one-to-one"}}

# telemetry and intersect()

Code
Expand All @@ -48,6 +95,16 @@
Error in `rel_try()`:
! intersect: {"message":"Error in intersect","name":"intersect","x":{"...1":"integer","...2":"integer"},"y":{"...1":"integer","...2":"integer"}}

# telemetry and left_join()

Code
tibble(a = 1:3, b = 4:6) %>% as_duckplyr_df() %>% left_join(tibble(a = 1:3, b = 4:
6), by = "a", copy = TRUE, suffix = c("x", "y"), keep = TRUE, na_matches = "na",
multiple = "all", unmatched = "error", relationship = "one-to-one")
Condition
Error in `rel_try()`:
! left_join: {"message":"Error in left_join","name":"left_join","x":{"...1":"integer","...2":"integer"},"y":{"...1":"integer","...2":"integer"},"args":{"by":{"condition":"==","filter":"none","x":["...1"],"y":["...1"]},"copy":true,"keep":true,"na_matches":"na","multiple":"all","unmatched":"error","relationship":"one-to-one"}}

# telemetry and mutate()

Code
Expand All @@ -73,6 +130,16 @@
Error in `rel_try()`:
! rename: {"message":"Error in rename","name":"rename","x":{"...1":"integer","...2":"integer"},"args":{"dots":{"...3":"...1"}}}

# telemetry and right_join()

Code
tibble(a = 1:3, b = 4:6) %>% as_duckplyr_df() %>% right_join(tibble(a = 1:3, b = 4:
6), by = "a", copy = TRUE, suffix = c("x", "y"), keep = TRUE, na_matches = "na",
multiple = "all", unmatched = "error", relationship = "one-to-one")
Condition
Error in `rel_try()`:
! right_join: {"message":"Error in right_join","name":"right_join","x":{"...1":"integer","...2":"integer"},"y":{"...1":"integer","...2":"integer"},"args":{"by":{"condition":"==","filter":"none","x":["...1"],"y":["...1"]},"copy":true,"keep":true,"na_matches":"na","multiple":"all","unmatched":"error","relationship":"one-to-one"}}

# telemetry and select()

Code
Expand All @@ -81,6 +148,15 @@
Error in `rel_try()`:
! select: {"message":"Error in select","name":"select","x":{"...1":"integer","...2":"integer"},"args":{"dots":{"...3":"...2"}}}

# telemetry and semi_join()

Code
tibble(a = 1:3, b = 4:6) %>% as_duckplyr_df() %>% semi_join(tibble(a = 1:3, b = 4:
6), by = "a", copy = TRUE, na_matches = "na")
Condition
Error in `rel_try()`:
! semi_join: {"message":"Error in semi_join","name":"semi_join","x":{"...1":"integer","...2":"integer"},"y":{"...1":"integer","...2":"integer"},"args":{"by":{"condition":"==","filter":"none","x":["...1"],"y":["...1"]},"copy":true,"na_matches":"na"}}

# telemetry and setdiff()

Code
Expand Down
119 changes: 119 additions & 0 deletions tests/testthat/test-telemetry.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,40 @@
withr::local_envvar(DUCKPLYR_TELEMETRY_TEST = TRUE)

test_that("telemetry and anti_join()", {
expect_snapshot(error = TRUE, {
tibble(a = 1:3, b = 4:6) %>%
as_duckplyr_df() %>%
anti_join(
tibble(a = 1:3, b = 4:6),
by = "a",
copy = TRUE,
na_matches = "na"
)
})

expect_snapshot(error = TRUE, {
tibble(a = 1:3, b = 4:6) %>%
as_duckplyr_df() %>%
anti_join(
tibble(a = 1:3, b = 4:6),
by = c("a" = "b"),
copy = TRUE,
na_matches = "na"
)
})

expect_snapshot(error = TRUE, {
tibble(a = 1:3, b = 4:6) %>%
as_duckplyr_df() %>%
anti_join(
tibble(a = 1:3, b = 4:6),
by = join_by(a == b),
copy = FALSE,
na_matches = "never"
)
})
})

test_that("telemetry and arrange()", {
expect_snapshot(error = TRUE, {
tibble(a = 1:3, b = 4:6) %>%
Expand Down Expand Up @@ -38,6 +73,41 @@ test_that("telemetry and filter()", {
})
})

test_that("telemetry and full_join()", {
expect_snapshot(error = TRUE, {
tibble(a = 1:3, b = 4:6) %>%
as_duckplyr_df() %>%
full_join(
tibble(a = 1:3, b = 4:6),
by = "a",
copy = TRUE,
suffix = c("x", "y"),
keep = TRUE,
na_matches = "na",
multiple = "all",
relationship = "one-to-one"
)
})
})

test_that("telemetry and inner_join()", {
expect_snapshot(error = TRUE, {
tibble(a = 1:3, b = 4:6) %>%
as_duckplyr_df() %>%
inner_join(
tibble(a = 1:3, b = 4:6),
by = "a",
copy = TRUE,
suffix = c("x", "y"),
keep = TRUE,
na_matches = "na",
multiple = "all",
unmatched = "error",
relationship = "one-to-one"
)
})
})

test_that("telemetry and intersect()", {
expect_snapshot(error = TRUE, {
tibble(a = 1:3, b = 4:6) %>%
Expand All @@ -46,6 +116,24 @@ test_that("telemetry and intersect()", {
})
})

test_that("telemetry and left_join()", {
expect_snapshot(error = TRUE, {
tibble(a = 1:3, b = 4:6) %>%
as_duckplyr_df() %>%
left_join(
tibble(a = 1:3, b = 4:6),
by = "a",
copy = TRUE,
suffix = c("x", "y"),
keep = TRUE,
na_matches = "na",
multiple = "all",
unmatched = "error",
relationship = "one-to-one"
)
})
})

test_that("telemetry and mutate()", {
expect_snapshot(error = TRUE, {
tibble(a = 1:3, b = 4:6) %>%
Expand All @@ -70,6 +158,24 @@ test_that("telemetry and rename()", {
})
})

test_that("telemetry and right_join()", {
expect_snapshot(error = TRUE, {
tibble(a = 1:3, b = 4:6) %>%
as_duckplyr_df() %>%
right_join(
tibble(a = 1:3, b = 4:6),
by = "a",
copy = TRUE,
suffix = c("x", "y"),
keep = TRUE,
na_matches = "na",
multiple = "all",
unmatched = "error",
relationship = "one-to-one"
)
})
})

test_that("telemetry and select()", {
expect_snapshot(error = TRUE, {
tibble(a = 1:3, b = 4:6) %>%
Expand All @@ -78,6 +184,19 @@ test_that("telemetry and select()", {
})
})

test_that("telemetry and semi_join()", {
expect_snapshot(error = TRUE, {
tibble(a = 1:3, b = 4:6) %>%
as_duckplyr_df() %>%
semi_join(
tibble(a = 1:3, b = 4:6),
by = "a",
copy = TRUE,
na_matches = "na"
)
})
})

test_that("telemetry and setdiff()", {
expect_snapshot(error = TRUE, {
tibble(a = 1:3, b = 4:6) %>%
Expand Down

0 comments on commit 2438c2b

Please sign in to comment.