diff --git a/DESCRIPTION b/DESCRIPTION index d0a0a6fd..75db521f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -13,7 +13,7 @@ Description: A drop-in replacement for 'dplyr', powered by 'DuckDB' for performa Also defines a set of generics that provide a low-level implementer's interface for the high-level user interface of 'dplyr'. License: MIT + file LICENSE -URL: https://tidyverse.github.io/duckplyr, https://github.com/tidyverse/duckplyr +URL: https://duckplyr.tidyverse.org, https://github.com/tidyverse/duckplyr BugReports: https://github.com/tidyverse/duckplyr/issues Depends: R (>= 4.1.0) diff --git a/README.Rmd b/README.Rmd index 5bee8243..91a07085 100644 --- a/README.Rmd +++ b/README.Rmd @@ -31,7 +31,7 @@ local({ Sys.setenv(DUCKPLYR_OUTPUT_ORDER = TRUE) ``` -# duckplyr +# duckplyr [![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) diff --git a/README.md b/README.md index 8b163b69..25233f1f 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -# duckplyr +# duckplyr @@ -41,8 +41,8 @@ Or from [GitHub](https://github.com/) with: There are two ways to use duckplyr. -1. To enable duckplyr for individual data frames, use [`duckplyr::as_duckplyr_tibble()`](https://tidyverse.github.io/duckplyr/reference/as_duckplyr_tibble.html) as the first step in your pipe, without attaching the package. -2. By calling [`library(duckplyr)`](https://tidyverse.github.io/duckplyr/), it overwrites dplyr methods and is automatically enabled for the entire session without having to call `as_duckplyr_tibble()`. To turn this off, call `methods_restore()`. +1. To enable duckplyr for individual data frames, use [`duckplyr::as_duckplyr_tibble()`](https://tidyverse.github.io/duckplyr/reference/as_duckplyr_df.html) as the first step in your pipe, without attaching the package. +2. By calling [`library(duckplyr)`](https://duckplyr.tidyverse.org), it overwrites dplyr methods and is automatically enabled for the entire session without having to call `as_duckplyr_tibble()`. To turn this off, call `methods_restore()`. The examples below illustrate both methods. See also the companion [demo repository](https://github.com/Tmonster/duckplyr_demo) for a use case with a large dataset. @@ -50,14 +50,14 @@ The examples below illustrate both methods. See also the companion [demo reposit This example illustrates usage of duckplyr for individual data frames. -Use [`duckplyr::as_duckplyr_tibble()`](https://tidyverse.github.io/duckplyr/reference/as_duckplyr_tibble.html) to enable processing with duckdb: +Use [`duckplyr::as_duckplyr_tibble()`](https://tidyverse.github.io/duckplyr/reference/as_duckplyr_df.html) to enable processing with duckdb:
 out <-
   palmerpenguins::penguins %>%
   # CAVEAT: factor columns are not supported yet
   mutate(across(where(is.factor), as.character)) %>%
-  duckplyr::as_duckplyr_tibble() %>%
+  duckplyr::as_duckplyr_tibble() %>%
   mutate(bill_area = bill_length_mm * bill_depth_mm) %>%
   summarize(.by = c(species, sex), mean_bill_area = mean(bill_area)) %>%
   filter(species != "Gentoo")
@@ -77,86 +77,100 @@ duckdb is responsible for eventually carrying out the operations. Despite the la explain() #> ┌───────────────────────────┐ #> │ ORDER_BY │ -#> │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ -#> │ ORDERS: │ +#> │ ──────────────────── │ #> │ dataframe_42_42 │ #> │ 42.___row_number ASC │ -#> └─────────────┬─────────────┘ +#> └─────────────┬─────────────┘ #> ┌─────────────┴─────────────┐ #> │ FILTER │ -#> │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ -#> │r_base::!=(species, 'Gentoo│ -#> │ ') │ -#> │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ -#> │ EC: 34 │ -#> └─────────────┬─────────────┘ +#> │ ──────────────────── │ +#> │ "r_base::!="(species, │ +#> │ 'Gentoo') │ +#> │ │ +#> │ ~34 Rows │ +#> └─────────────┬─────────────┘ #> ┌─────────────┴─────────────┐ #> │ PROJECTION │ -#> │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ +#> │ ──────────────────── │ #> │ #0 │ #> │ #1 │ #> │ #2 │ #> │ #3 │ -#> └─────────────┬─────────────┘ +#> │ │ +#> │ ~172 Rows │ +#> └─────────────┬─────────────┘ #> ┌─────────────┴─────────────┐ #> │ STREAMING_WINDOW │ -#> │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ +#> │ ──────────────────── │ +#> │ Projections: │ #> │ ROW_NUMBER() OVER () │ -#> └─────────────┬─────────────┘ +#> └─────────────┬─────────────┘ #> ┌─────────────┴─────────────┐ #> │ ORDER_BY │ -#> │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ -#> │ ORDERS: │ +#> │ ──────────────────── │ #> │ dataframe_42_42 │ #> │ 42.___row_number ASC │ -#> └─────────────┬─────────────┘ +#> └─────────────┬─────────────┘ #> ┌─────────────┴─────────────┐ #> │ HASH_GROUP_BY │ -#> │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ +#> │ ──────────────────── │ +#> │ Groups: │ #> │ #0 │ #> │ #1 │ +#> │ │ +#> │ Aggregates: │ #> │ min(#2) │ #> │ mean(#3) │ -#> └─────────────┬─────────────┘ +#> │ │ +#> │ ~172 Rows │ +#> └─────────────┬─────────────┘ #> ┌─────────────┴─────────────┐ #> │ PROJECTION │ -#> │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ +#> │ ──────────────────── │ #> │ species │ #> │ sex │ #> │ ___row_number │ #> │ bill_area │ -#> └─────────────┬─────────────┘ +#> │ │ +#> │ ~344 Rows │ +#> └─────────────┬─────────────┘ #> ┌─────────────┴─────────────┐ #> │ PROJECTION │ -#> │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ +#> │ ──────────────────── │ #> │ #0 │ #> │ #1 │ #> │ #2 │ #> │ #3 │ -#> └─────────────┬─────────────┘ +#> │ │ +#> │ ~344 Rows │ +#> └─────────────┬─────────────┘ #> ┌─────────────┴─────────────┐ #> │ STREAMING_WINDOW │ -#> │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ +#> │ ──────────────────── │ +#> │ Projections: │ #> │ ROW_NUMBER() OVER () │ -#> └─────────────┬─────────────┘ +#> └─────────────┬─────────────┘ #> ┌─────────────┴─────────────┐ #> │ PROJECTION │ -#> │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ +#> │ ──────────────────── │ #> │ species │ #> │ sex │ #> │ bill_area │ -#> └─────────────┬─────────────┘ +#> │ │ +#> │ ~344 Rows │ +#> └─────────────┬─────────────┘ #> ┌─────────────┴─────────────┐ #> │ R_DATAFRAME_SCAN │ -#> │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ +#> │ ──────────────────── │ #> │ data.frame │ -#> │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ +#> │ │ +#> │ Projections: │ #> │ species │ #> │ bill_length_mm │ #> │ bill_depth_mm │ #> │ sex │ -#> │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ -#> │ EC: 344 │ +#> │ │ +#> │ ~344 Rows │ #> └───────────────────────────┘ All data frame operations are supported. Computation happens upon the first request. @@ -169,13 +183,13 @@ All data frame operations are supported. Computation happens upon the first requ #> --------------------- #> Projection [species as species, sex as sex, mean_bill_area as mean_bill_area] #> Order [___row_number ASC] -#> Filter [!=(species, 'Gentoo')] +#> Filter ["!="(species, 'Gentoo')] #> Projection [species as species, sex as sex, mean_bill_area as mean_bill_area, row_number() OVER () as ___row_number] #> Projection [species as species, sex as sex, mean_bill_area as mean_bill_area] #> Order [___row_number ASC] #> Aggregate [species, sex, min(___row_number), mean(bill_area)] #> Projection [species as species, island as island, bill_length_mm as bill_length_mm, bill_depth_mm as bill_depth_mm, flipper_length_mm as flipper_length_mm, body_mass_g as body_mass_g, sex as sex, "year" as year, bill_area as bill_area, row_number() OVER () as ___row_number] -#> Projection [species as species, island as island, bill_length_mm as bill_length_mm, bill_depth_mm as bill_depth_mm, flipper_length_mm as flipper_length_mm, body_mass_g as body_mass_g, sex as sex, "year" as year, *(bill_length_mm, bill_depth_mm) as bill_area] +#> Projection [species as species, island as island, bill_length_mm as bill_length_mm, bill_depth_mm as bill_depth_mm, flipper_length_mm as flipper_length_mm, body_mass_g as body_mass_g, sex as sex, "year" as year, "*"(bill_length_mm, bill_depth_mm) as bill_area] #> r_dataframe_scan(0xdeadbeef) #> #> --------------------- @@ -204,7 +218,7 @@ After the computation has been carried out, the results are available immediatel This example illustrates usage of duckplyr for all data frames in the R session. -Use [`library(duckplyr)`](https://tidyverse.github.io/duckplyr/) or [`duckplyr::methods_overwrite()`](https://tidyverse.github.io/duckplyr/reference/methods_overwrite.html) to overwrite dplyr methods and enable processing with duckdb for all data frames: +Use [`library(duckplyr)`](https://duckplyr.tidyverse.org) or [`duckplyr::methods_overwrite()`](https://tidyverse.github.io/duckplyr/reference/methods_overwrite.html) to overwrite dplyr methods and enable processing with duckdb for all data frames:
 duckplyr::methods_overwrite()
@@ -238,13 +252,13 @@ Querying the number of rows also starts the computation:
 #> ---------------------
 #> Projection [species as species, sex as sex, mean_bill_area as mean_bill_area]
 #>   Order [___row_number ASC]
-#>     Filter [!=(species, 'Gentoo')]
+#>     Filter ["!="(species, 'Gentoo')]
 #>       Projection [species as species, sex as sex, mean_bill_area as mean_bill_area, row_number() OVER () as ___row_number]
 #>         Projection [species as species, sex as sex, mean_bill_area as mean_bill_area]
 #>           Order [___row_number ASC]
 #>             Aggregate [species, sex, min(___row_number), mean(bill_area)]
 #>               Projection [species as species, island as island, bill_length_mm as bill_length_mm, bill_depth_mm as bill_depth_mm, flipper_length_mm as flipper_length_mm, body_mass_g as body_mass_g, sex as sex, "year" as year, bill_area as bill_area, row_number() OVER () as ___row_number]
-#>                 Projection [species as species, island as island, bill_length_mm as bill_length_mm, bill_depth_mm as bill_depth_mm, flipper_length_mm as flipper_length_mm, body_mass_g as body_mass_g, sex as sex, "year" as year, *(bill_length_mm, bill_depth_mm) as bill_area]
+#>                 Projection [species as species, island as island, bill_length_mm as bill_length_mm, bill_depth_mm as bill_depth_mm, flipper_length_mm as flipper_length_mm, body_mass_g as body_mass_g, sex as sex, "year" as year, "*"(bill_length_mm, bill_depth_mm) as bill_area]
 #>                   r_dataframe_scan(0xdeadbeef)
 #> 
 #> ---------------------
@@ -298,7 +312,7 @@ The first time the package encounters an unsupported function, data type, or ope
 
 
 palmerpenguins::penguins %>%
-  duckplyr::as_duckplyr_tibble() %>%
+  duckplyr::as_duckplyr_tibble() %>%
   transmute(bill_area = bill_length_mm * bill_depth_mm) %>%
   head(3)
 #> The duckplyr package is configured to fall back to dplyr when it encounters an
@@ -353,7 +367,7 @@ The dbplyr package is a dplyr backend that connects to SQL databases, and is des
 This package also provides generics, for which other packages may then implement methods.
 
 
-library(duckplyr)
+library(duckplyr)
 #>  Overwriting dplyr methods with duckplyr methods.
 #>  Turn off with `duckplyr::methods_restore()`.
diff --git a/man/figures/logo.png b/man/figures/logo.png new file mode 100644 index 00000000..fa3750d6 Binary files /dev/null and b/man/figures/logo.png differ