Skip to content

Commit

Permalink
Merge pull request #185 from duckdblabs/f-join-ptype
Browse files Browse the repository at this point in the history
  • Loading branch information
krlmlr authored Jul 10, 2024
2 parents 2cde0dc + a18c0ff commit 124163e
Show file tree
Hide file tree
Showing 8 changed files with 62 additions and 15 deletions.
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,9 @@ importFrom(vctrs,vec_check_size)
importFrom(vctrs,vec_data)
importFrom(vctrs,vec_in)
importFrom(vctrs,vec_match)
importFrom(vctrs,vec_ptype)
importFrom(vctrs,vec_ptype2)
importFrom(vctrs,vec_ptype_finalise)
importFrom(vctrs,vec_rbind)
importFrom(vctrs,vec_recycle_common)
importFrom(vctrs,vec_rep)
Expand Down
1 change: 1 addition & 0 deletions R/dplyr.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ join_rows <- dplyr$join_rows
mutate_cols <- dplyr$mutate_cols
mutate_keep <- dplyr$mutate_keep
named_args <- dplyr$named_args
rethrow_error_join_incompatible_type <- dplyr$rethrow_error_join_incompatible_type
rows_bind <- dplyr$rows_bind
rows_cast_y <- dplyr$rows_cast_y
rows_check_by <- dplyr$rows_check_by
Expand Down
3 changes: 3 additions & 0 deletions R/duckplyr-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@
#' @importFrom vctrs vec_data
#' @importFrom vctrs vec_in
#' @importFrom vctrs vec_match
#' @importFrom vctrs vec_ptype
#' @importFrom vctrs vec_ptype_finalise
#' @importFrom vctrs vec_ptype2
#' @importFrom vctrs vec_rbind
#' @importFrom vctrs vec_recycle_common
#' @importFrom vctrs vec_rep
Expand Down
39 changes: 29 additions & 10 deletions R/join.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,13 @@
rel_join_impl <- function(x, y, by, join, na_matches, suffix, keep, error_call = caller_env()) {
rel_join_impl <- function(
x,
y,
by,
join,
na_matches,
suffix = c(".x", ".y"),
keep = NULL,
error_call = caller_env()
) {
mutating <- !(join %in% c("semi", "anti"))

if (mutating) {
Expand All @@ -23,6 +32,25 @@ rel_join_impl <- function(x, y, by, join, na_matches, suffix, keep, error_call =
y_rel <- duckdb_rel_from_df(y)
y_rel <- rel_set_alias(y_rel, "rhs")

# FIXME: Split join_cols, https://github.com/tidyverse/dplyr/issues/7050
vars <- join_cols(
x_names = x_names,
y_names = y_names,
by = by,
suffix = suffix,
keep = keep,
error_call = error_call
)

x_in <- vec_ptype(x)
y_in <- vec_ptype(y)

x_key <- set_names(x_in[vars$x$key], names(vars$x$key))
y_key <- set_names(y_in[vars$y$key], names(vars$x$key))

# Side effect: check join compatibility
join_ptype_common(x_key, y_key, vars, error_call = error_call)

# Rename if non-unique column names
if (mutating) {
if (length(intersect(x_names, y_names)) != 0) {
Expand Down Expand Up @@ -74,15 +102,6 @@ rel_join_impl <- function(x, y, by, join, na_matches, suffix, keep, error_call =
list(x_rel, y_rel)
)

vars <- join_cols(
x_names = x_names,
y_names = y_names,
by = by,
suffix = suffix,
keep = keep,
error_call = error_call
)

exprs <- c(
nexprs_from_loc(x_names_remap, vars$x$out),
nexprs_from_loc(y_names_remap, vars$y$out)
Expand Down
15 changes: 15 additions & 0 deletions R/join_ptype_common.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# https://github.com/tidyverse/dplyr/pull/7029

join_ptype_common <- function(x, y, vars, error_call = caller_env()) {
# Explicit `x/y_arg = ""` to avoid auto naming in `cnd$x_arg`
ptype <- try_fetch(
vec_ptype2(x, y, x_arg = "", y_arg = "", call = error_call),
vctrs_error_incompatible_type = function(cnd) {
rethrow_error_join_incompatible_type(cnd, vars, error_call)
}
)
# Finalize unspecified columns (#6804)
ptype <- vec_ptype_finalise(ptype)

ptype
}
11 changes: 11 additions & 0 deletions tests/testthat/_snaps/join.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,17 @@
i `x$a` is a <double>.
i `y$b` is a <character>.

# filtering joins reference original column in `y` when there are type errors (#6465)

Code
(expect_error(duckplyr_semi_join(x, y, by = join_by(a == b))))
Output
<error/dplyr_error_join_incompatible_type>
Error in `semi_join()`:
! Can't join `x$a` with `y$b` due to incompatible types.
i `x$a` is a <double>.
i `y$b` is a <character>.

# error if passed additional arguments

Code
Expand Down
2 changes: 0 additions & 2 deletions tests/testthat/test-join.R
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,6 @@ test_that("mutating joins don't trigger many-to-many warning when called indirec
})

test_that("mutating joins compute common columns", {
skip("TODO duckdb")
df1 <- tibble(x = c(1, 2), y = c(2, 3))
df2 <- tibble(x = c(1, 3), z = c(2, 3))
expect_snapshot(out <- duckplyr_left_join(df1, df2))
Expand Down Expand Up @@ -450,7 +449,6 @@ test_that("mutating joins reference original column in `y` when there are type e
})

test_that("filtering joins reference original column in `y` when there are type errors (#6465)", {
skip("TODO duckdb")
x <- tibble(a = 1)
y <- tibble(b = "1")

Expand Down
3 changes: 0 additions & 3 deletions tools/00-funs.R
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,7 @@ duckplyr_tests <- head(n = -1, list(
"mutating joins trigger multiple match warning",
"mutating joins don't trigger multiple match warning when called indirectly",

"filtering joins reference original column in `y` when there are type errors (#6465)",

"mutating joins trigger many-to-many warning",
"mutating joins compute common columns",
"mutating joins don't trigger many-to-many warning when called indirectly",
NULL
),
Expand Down

0 comments on commit 124163e

Please sign in to comment.