From b33b0d3d061fb5d0942caaf450e4473000fba4b2 Mon Sep 17 00:00:00 2001 From: James Wade Date: Fri, 2 Feb 2024 10:11:28 -0500 Subject: [PATCH 01/11] update snapshots --- tests/testthat/_snaps/measure_input_long.md | 12 ++--- tests/testthat/_snaps/measure_input_wide.md | 6 +-- tests/testthat/_snaps/measure_output_long.md | 4 +- tests/testthat/_snaps/measure_output_wide.md | 4 +- tests/testthat/_snaps/savitzky_golay.md | 48 ++++++++++---------- tests/testthat/helpers-data.R | 16 +++---- 6 files changed, 45 insertions(+), 45 deletions(-) diff --git a/tests/testthat/_snaps/measure_input_long.md b/tests/testthat/_snaps/measure_input_long.md index 3e1f939..e115358 100644 --- a/tests/testthat/_snaps/measure_input_long.md +++ b/tests/testthat/_snaps/measure_input_long.md @@ -2,7 +2,7 @@ Code print(rec_1) - Message + Message -- Recipe ---------------------------------------------------------------------- @@ -43,7 +43,7 @@ Code print(prep_1) - Message + Message -- Recipe ---------------------------------------------------------------------- @@ -87,7 +87,7 @@ Code recipe(water + fat + protein ~ absorp, data = na_train) %>% step_measure_input_long(absorp) %>% prep() - Error + Condition Error in `step_measure_input_long()`: Caused by error in `prep()`: ! 'location' is required for long input data @@ -97,7 +97,7 @@ Code recipe(water + fat + protein ~ ., data = miss_train) %>% step_measure_input_long(absorp, location = vars(ind)) %>% prep() - Error + Condition Error in `step_measure_input_long()`: Caused by error in `check_measure_dims()`: ! The number of rows in each measure should be the same. Most samples have 100 rows and these do not: 1. Please pad the input with missing values. @@ -107,7 +107,7 @@ Code recipe(water + fat + protein ~ ., data = na_train) %>% step_measure_input_long( dplyr::everything(), location = vars(ind)) %>% prep() - Error + Condition Error in `step_measure_input_long()`: Caused by error in `check_single_selector()`: ! The selection for `...` should only select a single column (6 columns were selected). @@ -117,7 +117,7 @@ Code recipe(water + fat + protein ~ ., data = na_train) %>% step_measure_input_long( absorp, location = vars(dplyr::everything())) %>% prep() - Error + Condition Error in `step_measure_input_long()`: Caused by error in `check_single_selector()`: ! The selection for `location` should only select a single column (6 columns were selected). diff --git a/tests/testthat/_snaps/measure_input_wide.md b/tests/testthat/_snaps/measure_input_wide.md index 5c84336..98d5e98 100644 --- a/tests/testthat/_snaps/measure_input_wide.md +++ b/tests/testthat/_snaps/measure_input_wide.md @@ -2,7 +2,7 @@ Code print(rec_1) - Message + Message -- Recipe ---------------------------------------------------------------------- @@ -48,7 +48,7 @@ Code print(prep_1) - Message + Message -- Recipe ---------------------------------------------------------------------- @@ -92,7 +92,7 @@ Code recipe(water + fat + protein ~ ., data = na_train) %>% step_measure_input_wide( x_001:x_100, location_values = 1:2) %>% prep() - Error + Condition Error in `step_measure_input_wide()`: Caused by error in `prep()`: ! 100 columns were selected as inputs but `location_values` has 2 values. diff --git a/tests/testthat/_snaps/measure_output_long.md b/tests/testthat/_snaps/measure_output_long.md index 639e464..a278c4e 100644 --- a/tests/testthat/_snaps/measure_output_long.md +++ b/tests/testthat/_snaps/measure_output_long.md @@ -2,7 +2,7 @@ Code print(rec_1) - Message + Message -- Recipe ---------------------------------------------------------------------- @@ -45,7 +45,7 @@ Code print(prep_1) - Message + Message -- Recipe ---------------------------------------------------------------------- diff --git a/tests/testthat/_snaps/measure_output_wide.md b/tests/testthat/_snaps/measure_output_wide.md index eb6c68c..c6d6f84 100644 --- a/tests/testthat/_snaps/measure_output_wide.md +++ b/tests/testthat/_snaps/measure_output_wide.md @@ -2,7 +2,7 @@ Code print(rec_1) - Message + Message -- Recipe ---------------------------------------------------------------------- @@ -45,7 +45,7 @@ Code print(prep_1) - Message + Message -- Recipe ---------------------------------------------------------------------- diff --git a/tests/testthat/_snaps/savitzky_golay.md b/tests/testthat/_snaps/savitzky_golay.md index cf647a4..b664c7c 100644 --- a/tests/testthat/_snaps/savitzky_golay.md +++ b/tests/testthat/_snaps/savitzky_golay.md @@ -47,7 +47,7 @@ Code rec %>% step_measure_savitzky_golay(differentiation_order = bad_inputs$diffs[i], window_size = bad_inputs$wn[i], degree = bad_inputs$deg[i]) %>% prep() - Error + Condition Error in `step_measure_savitzky_golay()`: Caused by error in `prep()`: ! The `window_size` argument to `step_measure_savitzky_golay()` should be a single odd integer greater than 0. @@ -57,7 +57,7 @@ Code rec %>% step_measure_savitzky_golay(differentiation_order = bad_inputs$diffs[i], window_size = bad_inputs$wn[i], degree = bad_inputs$deg[i]) %>% prep() - Error + Condition Error in `step_measure_savitzky_golay()`: Caused by error in `prep()`: ! The `window_size` argument to `step_measure_savitzky_golay()` should be a single odd integer greater than 0. @@ -67,7 +67,7 @@ Code rec %>% step_measure_savitzky_golay(differentiation_order = bad_inputs$diffs[i], window_size = bad_inputs$wn[i], degree = bad_inputs$deg[i]) %>% prep() - Error + Condition Error in `step_measure_savitzky_golay()`: Caused by error in `prep()`: ! The `window_size` argument to `step_measure_savitzky_golay()` should be a single odd integer greater than 0. @@ -77,7 +77,7 @@ Code rec %>% step_measure_savitzky_golay(differentiation_order = bad_inputs$diffs[i], window_size = bad_inputs$wn[i], degree = bad_inputs$deg[i]) %>% prep() - Error + Condition Error in `step_measure_savitzky_golay()`: Caused by error in `prep()`: ! The `window_size` argument to `step_measure_savitzky_golay()` should be a single odd integer greater than 0. @@ -87,7 +87,7 @@ Code rec %>% step_measure_savitzky_golay(differentiation_order = bad_inputs$diffs[i], window_size = bad_inputs$wn[i], degree = bad_inputs$deg[i]) %>% prep() - Error + Condition Error in `step_measure_savitzky_golay()`: Caused by error in `prep()`: ! The `window_size` argument to `step_measure_savitzky_golay()` should be a single odd integer greater than 0. @@ -97,7 +97,7 @@ Code rec %>% step_measure_savitzky_golay(differentiation_order = bad_inputs$diffs[i], window_size = bad_inputs$wn[i], degree = bad_inputs$deg[i]) %>% prep() - Error + Condition Error in `step_measure_savitzky_golay()`: Caused by error in `prep()`: ! The `window_size` argument to `step_measure_savitzky_golay()` should be a single odd integer greater than 0. @@ -107,7 +107,7 @@ Code rec %>% step_measure_savitzky_golay(differentiation_order = bad_inputs$diffs[i], window_size = bad_inputs$wn[i], degree = bad_inputs$deg[i]) %>% prep() - Error + Condition Error in `step_measure_savitzky_golay()`: Caused by error in `prep()`: ! The `window_size` argument to `step_measure_savitzky_golay()` should be a single odd integer greater than 0. @@ -117,7 +117,7 @@ Code rec %>% step_measure_savitzky_golay(differentiation_order = bad_inputs$diffs[i], window_size = bad_inputs$wn[i], degree = bad_inputs$deg[i]) %>% prep() - Error + Condition Error in `step_measure_savitzky_golay()`: Caused by error in `prep()`: ! The `window_size` argument to `step_measure_savitzky_golay()` should be a single odd integer greater than 0. @@ -127,7 +127,7 @@ Code rec %>% step_measure_savitzky_golay(differentiation_order = bad_inputs$diffs[i], window_size = bad_inputs$wn[i], degree = bad_inputs$deg[i]) %>% prep() - Error + Condition Error in `step_measure_savitzky_golay()`: Caused by error in `prep()`: ! The `window_size` argument to `step_measure_savitzky_golay()` should be a single odd integer greater than 0. @@ -137,7 +137,7 @@ Code rec %>% step_measure_savitzky_golay(differentiation_order = bad_inputs$diffs[i], window_size = bad_inputs$wn[i], degree = bad_inputs$deg[i]) %>% prep() - Error + Condition Error in `step_measure_savitzky_golay()`: Caused by error in `prep()`: ! The `window_size` argument to `step_measure_savitzky_golay()` should be a single odd integer greater than 0. @@ -147,7 +147,7 @@ Code rec %>% step_measure_savitzky_golay(differentiation_order = bad_inputs$diffs[i], window_size = bad_inputs$wn[i], degree = bad_inputs$deg[i]) %>% prep() - Error + Condition Error in `step_measure_savitzky_golay()`: Caused by error in `prep()`: ! The `window_size` argument to `step_measure_savitzky_golay()` should be a single odd integer greater than 0. @@ -157,7 +157,7 @@ Code rec %>% step_measure_savitzky_golay(differentiation_order = bad_inputs$diffs[i], window_size = bad_inputs$wn[i], degree = bad_inputs$deg[i]) %>% prep() - Error + Condition Error in `step_measure_savitzky_golay()`: Caused by error in `prep()`: ! The `window_size` argument to `step_measure_savitzky_golay()` should be a single odd integer greater than 0. @@ -167,7 +167,7 @@ Code rec %>% step_measure_savitzky_golay(differentiation_order = bad_inputs$diffs[i], window_size = bad_inputs$wn[i], degree = bad_inputs$deg[i]) %>% prep() - Error + Condition Error in `step_measure_savitzky_golay()`: Caused by error in `prep()`: ! The `window_size` argument to `step_measure_savitzky_golay()` should be a single odd integer greater than 0. @@ -177,7 +177,7 @@ Code rec %>% step_measure_savitzky_golay(differentiation_order = bad_inputs$diffs[i], window_size = bad_inputs$wn[i], degree = bad_inputs$deg[i]) %>% prep() - Error + Condition Error in `step_measure_savitzky_golay()`: Caused by error in `prep()`: ! The `window_size` argument to `step_measure_savitzky_golay()` should be a single odd integer greater than 0. @@ -187,7 +187,7 @@ Code rec %>% step_measure_savitzky_golay(differentiation_order = bad_inputs$diffs[i], window_size = bad_inputs$wn[i], degree = bad_inputs$deg[i]) %>% prep() - Error + Condition Error in `step_measure_savitzky_golay()`: Caused by error in `prep()`: ! The `window_size` argument to `step_measure_savitzky_golay()` should be a single odd integer greater than 0. @@ -197,7 +197,7 @@ Code rec %>% step_measure_savitzky_golay(differentiation_order = bad_inputs$diffs[i], window_size = bad_inputs$wn[i], degree = bad_inputs$deg[i]) %>% prep() - Error + Condition Error in `step_measure_savitzky_golay()`: Caused by error in `prep()`: ! The `window_size` argument to `step_measure_savitzky_golay()` should be a single odd integer greater than 0. @@ -207,7 +207,7 @@ Code rec %>% step_measure_savitzky_golay(differentiation_order = bad_inputs$diffs[i], window_size = bad_inputs$wn[i], degree = bad_inputs$deg[i]) %>% prep() - Error + Condition Error in `step_measure_savitzky_golay()`: Caused by error in `prep()`: ! The `window_size` argument to `step_measure_savitzky_golay()` should be a single odd integer greater than 0. @@ -217,7 +217,7 @@ Code rec %>% step_measure_savitzky_golay(differentiation_order = bad_inputs$diffs[i], window_size = bad_inputs$wn[i], degree = bad_inputs$deg[i]) %>% prep() - Error + Condition Error in `step_measure_savitzky_golay()`: Caused by error in `prep()`: ! The `window_size` argument to `step_measure_savitzky_golay()` should be a single odd integer greater than 0. @@ -227,7 +227,7 @@ Code rec %>% step_measure_savitzky_golay(differentiation_order = bad_inputs$diffs[i], window_size = bad_inputs$wn[i], degree = bad_inputs$deg[i]) %>% prep() - Error + Condition Error in `step_measure_savitzky_golay()`: Caused by error in `prep()`: ! The `window_size` argument to `step_measure_savitzky_golay()` should be a single odd integer greater than 0. @@ -237,7 +237,7 @@ Code rec %>% step_measure_savitzky_golay(differentiation_order = bad_inputs$diffs[i], window_size = bad_inputs$wn[i], degree = bad_inputs$deg[i]) %>% prep() - Error + Condition Error in `step_measure_savitzky_golay()`: Caused by error in `prep()`: ! The `window_size` argument to `step_measure_savitzky_golay()` should be a single odd integer greater than 0. @@ -247,7 +247,7 @@ Code rec %>% step_measure_savitzky_golay(differentiation_order = bad_inputs$diffs[i], window_size = bad_inputs$wn[i], degree = bad_inputs$deg[i]) %>% prep() - Error + Condition Error in `step_measure_savitzky_golay()`: Caused by error in `prep()`: ! The `window_size` argument to `step_measure_savitzky_golay()` should be a single odd integer greater than 0. @@ -257,7 +257,7 @@ Code rec %>% step_measure_savitzky_golay(differentiation_order = bad_inputs$diffs[i], window_size = bad_inputs$wn[i], degree = bad_inputs$deg[i]) %>% prep() - Error + Condition Error in `step_measure_savitzky_golay()`: Caused by error in `prep()`: ! The `window_size` argument to `step_measure_savitzky_golay()` should be a single odd integer greater than 0. @@ -267,7 +267,7 @@ Code rec %>% step_measure_savitzky_golay(differentiation_order = bad_inputs$diffs[i], window_size = bad_inputs$wn[i], degree = bad_inputs$deg[i]) %>% prep() - Error + Condition Error in `step_measure_savitzky_golay()`: Caused by error in `prep()`: ! The `window_size` argument to `step_measure_savitzky_golay()` should be a single odd integer greater than 0. @@ -277,7 +277,7 @@ Code rec %>% step_measure_savitzky_golay(differentiation_order = bad_inputs$diffs[i], window_size = bad_inputs$wn[i], degree = bad_inputs$deg[i]) %>% prep() - Error + Condition Error in `step_measure_savitzky_golay()`: Caused by error in `prep()`: ! The `window_size` argument to `step_measure_savitzky_golay()` should be a single odd integer greater than 0. diff --git a/tests/testthat/helpers-data.R b/tests/testthat/helpers-data.R index 4aabead..69c1c6f 100644 --- a/tests/testthat/helpers-data.R +++ b/tests/testthat/helpers-data.R @@ -11,20 +11,20 @@ data_meat_long <- function() { meats_2 <- meats %>% mutate(.sample_num = row_number()) %>% - tidyr::pivot_longer( + pivot_longer( cols = c(x_001:x_100), names_to = "temp", values_to = "absorp" ) %>% - dplyr::full_join(inds, by = "temp") %>% - dplyr:: select(-temp) + full_join(inds, by = "temp") %>% + select(-temp) inds <- tibble(temp = grep("^x_", names(meats), value = TRUE), ind = seq(1, 17, length.out = 100)) list( - train = meats_2 %>% dplyr::filter(.sample_num <= 200), - test = meats_2 %>% dplyr::filter(.sample_num > 200) + train = meats_2 %>% filter(.sample_num <= 200), + test = meats_2 %>% filter(.sample_num > 200) ) } @@ -38,11 +38,11 @@ data_meat_wide <- function() { meats_2 <- meats %>% mutate(.sample_num = row_number()) %>% - dplyr::relocate(.sample_num) + relocate(.sample_num) list( - train = meats_2 %>% dplyr::filter(.sample_num <= 200), - test = meats_2 %>% dplyr::filter(.sample_num > 200), + train = meats_2 %>% filter(.sample_num <= 200), + test = meats_2 %>% filter(.sample_num > 200), ind = seq(1, 17, length.out = 100) ) } From 9d88ce3ab291b288feb3c6b8e54830d445d93893 Mon Sep 17 00:00:00 2001 From: James Wade Date: Fri, 2 Feb 2024 10:12:56 -0500 Subject: [PATCH 02/11] update roxygen --- DESCRIPTION | 2 +- tests/testthat/helpers-data.R | 51 ----------------------------------- 2 files changed, 1 insertion(+), 52 deletions(-) delete mode 100644 tests/testthat/helpers-data.R diff --git a/DESCRIPTION b/DESCRIPTION index fc96346..c1f5de7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -39,4 +39,4 @@ Config/testthat/edition: 3 Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3.9000 +RoxygenNote: 7.3.1 diff --git a/tests/testthat/helpers-data.R b/tests/testthat/helpers-data.R deleted file mode 100644 index 69c1c6f..0000000 --- a/tests/testthat/helpers-data.R +++ /dev/null @@ -1,51 +0,0 @@ -data_meat_long <- function() { - skip_if_not_installed("modeldata") - data(meats, package = "modeldata") - - library(tidyr) - library(dplyr) - - inds <- tibble(temp = grep("^x_", names(meats), value = TRUE), - ind = seq(1, 17, length.out = 100)) - - meats_2 <- - meats %>% - mutate(.sample_num = row_number()) %>% - pivot_longer( - cols = c(x_001:x_100), - names_to = "temp", - values_to = "absorp" - ) %>% - full_join(inds, by = "temp") %>% - select(-temp) - - inds <- tibble(temp = grep("^x_", names(meats), value = TRUE), - ind = seq(1, 17, length.out = 100)) - - list( - train = meats_2 %>% filter(.sample_num <= 200), - test = meats_2 %>% filter(.sample_num > 200) - ) -} - -data_meat_wide <- function() { - skip_if_not_installed("modeldata") - data(meats, package = "modeldata") - - library(tidyr) - library(dplyr) - - meats_2 <- - meats %>% - mutate(.sample_num = row_number()) %>% - relocate(.sample_num) - - list( - train = meats_2 %>% filter(.sample_num <= 200), - test = meats_2 %>% filter(.sample_num > 200), - ind = seq(1, 17, length.out = 100) - ) -} - - - From c6b36b4c007f2abbeae100afe638231e11e41369 Mon Sep 17 00:00:00 2001 From: James Wade Date: Fri, 2 Feb 2024 10:13:23 -0500 Subject: [PATCH 03/11] chore: remove explicity namespace --- tests/testthat/helpers-data.R | 51 +++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 tests/testthat/helpers-data.R diff --git a/tests/testthat/helpers-data.R b/tests/testthat/helpers-data.R new file mode 100644 index 0000000..69c1c6f --- /dev/null +++ b/tests/testthat/helpers-data.R @@ -0,0 +1,51 @@ +data_meat_long <- function() { + skip_if_not_installed("modeldata") + data(meats, package = "modeldata") + + library(tidyr) + library(dplyr) + + inds <- tibble(temp = grep("^x_", names(meats), value = TRUE), + ind = seq(1, 17, length.out = 100)) + + meats_2 <- + meats %>% + mutate(.sample_num = row_number()) %>% + pivot_longer( + cols = c(x_001:x_100), + names_to = "temp", + values_to = "absorp" + ) %>% + full_join(inds, by = "temp") %>% + select(-temp) + + inds <- tibble(temp = grep("^x_", names(meats), value = TRUE), + ind = seq(1, 17, length.out = 100)) + + list( + train = meats_2 %>% filter(.sample_num <= 200), + test = meats_2 %>% filter(.sample_num > 200) + ) +} + +data_meat_wide <- function() { + skip_if_not_installed("modeldata") + data(meats, package = "modeldata") + + library(tidyr) + library(dplyr) + + meats_2 <- + meats %>% + mutate(.sample_num = row_number()) %>% + relocate(.sample_num) + + list( + train = meats_2 %>% filter(.sample_num <= 200), + test = meats_2 %>% filter(.sample_num > 200), + ind = seq(1, 17, length.out = 100) + ) +} + + + From dfb917b564a61826ef43269ce0b2b6e75efc8a1f Mon Sep 17 00:00:00 2001 From: James Wade Date: Fri, 2 Feb 2024 10:38:04 -0500 Subject: [PATCH 04/11] remove unused na_rm arg --- R/input_long.R | 2 +- R/input_wide.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/input_long.R b/R/input_long.R index cf1b354..5135f41 100644 --- a/R/input_long.R +++ b/R/input_long.R @@ -71,7 +71,7 @@ step_measure_input_long <- } step_measure_input_long_new <- - function(terms, role, trained, columns, location, na_rm, skip, id) { + function(terms, role, trained, columns, location, skip, id) { step( subclass = "measure_input_long", terms = terms, diff --git a/R/input_wide.R b/R/input_wide.R index 3ee49a0..1246029 100644 --- a/R/input_wide.R +++ b/R/input_wide.R @@ -89,7 +89,7 @@ step_measure_input_wide <- } step_measure_input_wide_new <- - function(terms, role, trained, columns, location_values, na_rm, skip, id) { + function(terms, role, trained, columns, location_values, skip, id) { step( subclass = "measure_input_wide", terms = terms, From 4f7c912b6b887fb6a8704b6fd94b16484663b042 Mon Sep 17 00:00:00 2001 From: James Wade Date: Fri, 2 Feb 2024 10:38:15 -0500 Subject: [PATCH 05/11] change filename --- R/{meats.R => data-meats.R} | 0 man/meats_long.Rd | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename R/{meats.R => data-meats.R} (100%) diff --git a/R/meats.R b/R/data-meats.R similarity index 100% rename from R/meats.R rename to R/data-meats.R diff --git a/man/meats_long.Rd b/man/meats_long.Rd index a0cbec3..7b0ea81 100644 --- a/man/meats_long.Rd +++ b/man/meats_long.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/meats.R +% Please edit documentation in R/data-meats.R \docType{data} \name{meats_long} \alias{meats_long} From 65b511efb4d90ef0bacbbca974eb2d637ecd193b Mon Sep 17 00:00:00 2001 From: James Wade Date: Fri, 2 Feb 2024 10:42:39 -0500 Subject: [PATCH 06/11] ci: update workflows --- .github/workflows/R-CMD-check.yaml | 15 +-------------- .github/workflows/pr-commands.yaml | 4 ++-- .github/workflows/test-coverage.yaml | 6 +++--- 3 files changed, 6 insertions(+), 19 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index ee65ccb..74d8c97 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -1,9 +1,5 @@ # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help -# -# NOTE: This workflow is overkill for most R packages and -# check-standard.yaml is likely a better choice. -# usethis::use_github_action("check-standard") will install it. on: push: branches: [main, master] @@ -23,26 +19,17 @@ jobs: matrix: config: - {os: macos-latest, r: 'release'} - - {os: windows-latest, r: 'release'} - # Use 3.6 to trigger usage of RTools35 - - {os: windows-latest, r: '3.6'} - # use 4.1 to check with rtools40's older compiler - - {os: windows-latest, r: '4.1'} - - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - {os: ubuntu-latest, r: 'release'} - {os: ubuntu-latest, r: 'oldrel-1'} - - {os: ubuntu-latest, r: 'oldrel-2'} - - {os: ubuntu-latest, r: 'oldrel-3'} - - {os: ubuntu-latest, r: 'oldrel-4'} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} R_KEEP_PKG_SOURCE: yes steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-pandoc@v2 diff --git a/.github/workflows/pr-commands.yaml b/.github/workflows/pr-commands.yaml index 71f335b..eea58c5 100644 --- a/.github/workflows/pr-commands.yaml +++ b/.github/workflows/pr-commands.yaml @@ -14,7 +14,7 @@ jobs: env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/pr-fetch@v2 with: @@ -51,7 +51,7 @@ jobs: env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/pr-fetch@v2 with: diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index 2c5bb50..960234c 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -15,7 +15,7 @@ jobs: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-r@v2 with: @@ -31,7 +31,7 @@ jobs: covr::codecov( quiet = FALSE, clean = FALSE, - install_path = file.path(Sys.getenv("RUNNER_TEMP"), "package") + install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") ) shell: Rscript {0} @@ -44,7 +44,7 @@ jobs: - name: Upload test results if: failure() - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: coverage-test-failures path: ${{ runner.temp }}/package From 8a2d31ae258a4c0d25f599b20fe3d055f7d7bc6c Mon Sep 17 00:00:00 2001 From: James Wade Date: Fri, 2 Feb 2024 14:00:12 -0500 Subject: [PATCH 07/11] feat: measures padding helper function for missing data --- R/checks.R | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/R/checks.R b/R/checks.R index 97f4e4c..e9e95d3 100644 --- a/R/checks.R +++ b/R/checks.R @@ -31,3 +31,30 @@ check_measure_dims <- function(x) { } invisible(NULL) } + +pad_measure_dims <- function(x) { + # Determine the most frequent number of rows + num_rows <- purrr::map_int(x$.measures, nrow) + num_unique <- sort(table(num_rows), decreasing = TRUE) + most_freq <- as.integer(names(num_unique)[1]) + + # Pad each measure so they all have 'most_freq' rows + x$.measures <- purrr::map(x$.measures, ~{ + df <- .x + if (nrow(df) < most_freq) { + # Calculate how many rows to add + rows_to_add <- most_freq - nrow(df) + # Create a data frame with the required number of missing rows + missing_rows <- + purrr::map_dfc(names(df), ~rep(NA_real_, rows_to_add)) %>% + tibble::as_tibble() %>% + setNames(names(df)) %>% + suppressMessages() # suppress message about new column names + # Bind the missing rows to the original data frame + df <- bind_rows(df, missing_rows) + df + } + df + }) + x +} From e576acc62d420644876dacdc70268719789490db Mon Sep 17 00:00:00 2001 From: James Wade Date: Fri, 2 Feb 2024 14:01:24 -0500 Subject: [PATCH 08/11] feat: add pad argument to input_long and remove NA 'values' from output_wide --- R/input_long.R | 24 +++++++++++++++++------- R/output_wide.R | 2 ++ man/step_measure_input_long.Rd | 9 +++++++-- 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/R/input_long.R b/R/input_long.R index 5135f41..d7cd715 100644 --- a/R/input_long.R +++ b/R/input_long.R @@ -7,14 +7,16 @@ #' @family input/output steps #' @inheritParams recipes::step_center #' @param ... One or more selector functions to choose which _single_ column -#' contains the analytical measurements. The selection should be in the order -#' of the measurement's profile. +#' contains the analytical measurements. The selection should be in the order +#' of the measurement's profile. #' @param location One or more selector functions to choose which _single_ -#' column has the locations of the analytical values. +#' column has the locations of the analytical values. +#' @param pad Whether to pad the measurements to ensure that they all have the +#' same number of values. This is useful when there are missing values in the +#' measurements. #' @param columns A character vector of column names determined by the recipe. -#' @details -#' This step is designed for data in a format where there is a column for the -#' analytical measurement (e.g., absorption, etc.) and another with the +#' @details This step is designed for data in a format where there is a column +#' for the analytical measurement (e.g., absorption, etc.) and another with the #' location of the value (e.g., wave number, etc.). #' #' `step_measure_input_long()` will collect those data and put them into a @@ -48,6 +50,7 @@ step_measure_input_long <- function(recipe, ..., location, + pad = FALSE, role = "measure", trained = FALSE, columns = NULL, @@ -64,6 +67,7 @@ step_measure_input_long <- role = role, columns = columns, location = location, + pad = pad, skip = skip, id = id ) @@ -71,7 +75,7 @@ step_measure_input_long <- } step_measure_input_long_new <- - function(terms, role, trained, columns, location, skip, id) { + function(terms, role, trained, columns, location, pad, skip, id) { step( subclass = "measure_input_long", terms = terms, @@ -79,6 +83,7 @@ step_measure_input_long_new <- trained = trained, columns = columns, location = location, + pad = pad, skip = skip, id = id ) @@ -104,6 +109,7 @@ prep.step_measure_input_long <- function(x, training, info = NULL, ...) { trained = TRUE, columns = unname(c(value_name, loc_name)), location = x$location, + pad = x$pad, skip = x$skip, id = x$id ) @@ -125,6 +131,10 @@ bake.step_measure_input_long <- function(object, new_data, ...) { tidyr::nest(.by = c(-value), .key = ".measures") } + if (rlang::is_true(object$pad)) { + new_data <- pad_measure_dims(new_data) + } + check_measure_dims(new_data) new_data diff --git a/R/output_wide.R b/R/output_wide.R index 178d9f6..c5f022e 100644 --- a/R/output_wide.R +++ b/R/output_wide.R @@ -95,6 +95,8 @@ bake.step_measure_output_wide <- function(object, new_data, ...) { new_data %>% tidyr::unnest(cols = c(.measures)) %>% dplyr::mutate(location = gsub(" ", "0", format(location))) %>% + # remove NA values that are introduced by padding + tidyr::drop_na("value") %>% tidyr::pivot_wider( id_cols = c(dplyr::all_of(non_meas)), names_from = "location", diff --git a/man/step_measure_input_long.Rd b/man/step_measure_input_long.Rd index 7f38f6a..ee77f4e 100644 --- a/man/step_measure_input_long.Rd +++ b/man/step_measure_input_long.Rd @@ -8,6 +8,7 @@ step_measure_input_long( recipe, ..., location, + pad = FALSE, role = "measure", trained = FALSE, columns = NULL, @@ -26,6 +27,10 @@ of the measurement's profile.} \item{location}{One or more selector functions to choose which \emph{single} column has the locations of the analytical values.} +\item{pad}{Whether to pad the measurements to ensure that they all have the +same number of values. This is useful when there are missing values in the +measurements.} + \item{role}{Not used by this step since no new variables are created.} @@ -50,8 +55,8 @@ step that converts measures organized in a column for the analytical results the package. } \details{ -This step is designed for data in a format where there is a column for the -analytical measurement (e.g., absorption, etc.) and another with the +This step is designed for data in a format where there is a column +for the analytical measurement (e.g., absorption, etc.) and another with the location of the value (e.g., wave number, etc.). \code{step_measure_input_long()} will collect those data and put them into a From 3f691192ad36ce25f25b2ca10d2f98edc1ed696f Mon Sep 17 00:00:00 2001 From: James Wade Date: Fri, 2 Feb 2024 14:03:41 -0500 Subject: [PATCH 09/11] [test] add padding to tests and update snapshots --- tests/testthat/_snaps/measure_output_long.md | 16 ++++++++++++++++ tests/testthat/_snaps/measure_output_wide.md | 14 ++++++++++++++ tests/testthat/test_measure_output_long.R | 16 +++++++++++++--- tests/testthat/test_measure_output_wide.R | 13 ++++++++++++- 4 files changed, 55 insertions(+), 4 deletions(-) diff --git a/tests/testthat/_snaps/measure_output_long.md b/tests/testthat/_snaps/measure_output_long.md index a278c4e..c5acf98 100644 --- a/tests/testthat/_snaps/measure_output_long.md +++ b/tests/testthat/_snaps/measure_output_long.md @@ -88,3 +88,19 @@ 1 1 step measure_input_long TRUE FALSE potato 2 2 step measure_output_long TRUE FALSE turnip +--- + + Error in `step_measure_input_long()`: + Caused by error in `check_measure_dims()`: + ! The number of rows in each measure should be the same. Most samples have 2 rows and these do not: 1. Please pad the input with missing values. + +--- + + Code + recipe(water + fat + protein ~ ., data = meats_train) %>% + step_measure_output_long() %>% prep() + Condition + Error in `step_measure_output_long()`: + Caused by error in `check_has_measure()`: + ! It appears that the measurements have not been converted for the inernal format. See `step_measure_input_long()` and `step_measure_input_wide()` and use these prior to `step_measure_output_long()`. + diff --git a/tests/testthat/_snaps/measure_output_wide.md b/tests/testthat/_snaps/measure_output_wide.md index c6d6f84..314d1a9 100644 --- a/tests/testthat/_snaps/measure_output_wide.md +++ b/tests/testthat/_snaps/measure_output_wide.md @@ -88,3 +88,17 @@ 1 1 step measure_input_long TRUE FALSE potato 2 2 step measure_output_wide TRUE FALSE turnip +--- + + object 'miss_train_padded' not found + +--- + + Code + recipe(water + fat + protein ~ ., data = meats_train) %>% + step_measure_output_wide() %>% prep() + Condition + Error in `step_measure_output_wide()`: + Caused by error in `check_has_measure()`: + ! It appears that the measurements have not been converted for the inernal format. See `step_measure_input_long()` and `step_measure_input_wide()` and use these prior to `step_measure_output_wide()`. + diff --git a/tests/testthat/test_measure_output_long.R b/tests/testthat/test_measure_output_long.R index 4e525d9..c464712 100644 --- a/tests/testthat/test_measure_output_long.R +++ b/tests/testthat/test_measure_output_long.R @@ -45,12 +45,22 @@ test_that("output long format data", { ### missing rows - bake_2 <- + expect_snapshot_error( recipe(water + fat + protein ~ ., data = miss_train) %>% - step_measure_input_long(absorp, location = vars(ind)) %>% + step_measure_input_long(absorp, location = vars(ind)) %>% + step_measure_output_long("rstudio", "posit", id = "turnip") %>% + prep() %>% + bake(new_data = NULL) + ) + + ### missing rows with padding + + bake_2 <- recipe(water + fat + protein ~ ., data = miss_train) %>% + step_measure_input_long(absorp, location = vars(ind), pad = TRUE) %>% step_measure_output_long("rstudio", "posit", id = "turnip") %>% prep() %>% bake(new_data = NULL) + dat_ptype <- tibble::tibble( .sample_num = integer(0), @@ -61,7 +71,7 @@ test_that("output long format data", { posit = numeric(0) ) expect_equal(bake_2[0,], dat_ptype) - expect_equal(nrow(bake_2), 399L) + expect_equal(nrow(bake_2), 400L) ## missing values diff --git a/tests/testthat/test_measure_output_wide.R b/tests/testthat/test_measure_output_wide.R index c4967d6..2754cb2 100644 --- a/tests/testthat/test_measure_output_wide.R +++ b/tests/testthat/test_measure_output_wide.R @@ -45,12 +45,23 @@ test_that("output wide format data", { ### missing rows + expect_snapshot_error( + recipe(water + fat + protein ~ ., data = miss_train_padded) %>% + step_measure_input_long(absorp, location = vars(ind)) %>% + step_measure_output_wide(id = "turnip") %>% + prep() %>% + bake(new_data = NULL) + ) + + ### missing rows with padding + bake_2 <- recipe(water + fat + protein ~ ., data = miss_train) %>% - step_measure_input_long(absorp, location = vars(ind)) %>% + step_measure_input_long(absorp, location = vars(ind), pad = TRUE) %>% step_measure_output_wide(id = "turnip") %>% prep() %>% bake(new_data = NULL) + # one NA value from padding takes complete cases from 200L -> 199L expect_equal(sum(complete.cases(bake_2)), 199L) expect_true(is.na(bake_2$measure_1.161616[1])) From 20ca58760330878d10eef300976e92a96c865eb1 Mon Sep 17 00:00:00 2001 From: James Wade Date: Fri, 2 Feb 2024 14:38:19 -0500 Subject: [PATCH 10/11] [chore] styler --- R/baseline.R | 13 +-- R/checks.R | 10 +- R/helpers.R | 12 ++- R/input_long.R | 14 +-- R/input_wide.R | 59 ++++++----- R/output_long.R | 26 +++-- R/output_wide.R | 20 ++-- R/savitzky_golay.R | 77 +++++++------- tests/testthat/helpers-data.R | 21 ++-- tests/testthat/test-data-transformations.R | 6 -- tests/testthat/test_measure_input_long.R | 8 +- tests/testthat/test_measure_input_wide.R | 7 +- tests/testthat/test_measure_output_long.R | 11 +- tests/testthat/test_measure_output_wide.R | 8 +- tests/testthat/test_savitzky_golay.R | 111 ++++++++++----------- vignettes/baseline.Rmd | 28 +++--- vignettes/recipes.Rmd | 2 +- 17 files changed, 223 insertions(+), 210 deletions(-) diff --git a/R/baseline.R b/R/baseline.R index 57a12bf..a9f3191 100644 --- a/R/baseline.R +++ b/R/baseline.R @@ -70,17 +70,18 @@ print.step_baseline <- #' #' @examples #' meats_long %>% subtract_rf_baseline(yvar = transmittance) -subtract_rf_baseline <- function(data, yvar, span = 2/3, maxit = c(5, 5)){ - +subtract_rf_baseline <- function(data, yvar, span = 2 / 3, maxit = c(5, 5)) { # rlang::arg_match0(as.character(rlang::enquo(yvar)), values = names(data)) data %>% dplyr::mutate( raw = {{ yvar }}, - baseline = IDPmisc::rfbaseline(x = 1:length({{ yvar }}), - y = {{ yvar }}, - span = span, - maxit = maxit)$fit, + baseline = IDPmisc::rfbaseline( + x = 1:length({{ yvar }}), + y = {{ yvar }}, + span = span, + maxit = maxit + )$fit, {{ yvar }} := {{ yvar }} - baseline ) } diff --git a/R/checks.R b/R/checks.R index e9e95d3..ed6fbc7 100644 --- a/R/checks.R +++ b/R/checks.R @@ -11,8 +11,10 @@ check_missing_measures <- function(.data, loc) { check_single_selector <- function(res, arg) { if (length(res) != 1) { - msg <- paste0("The selection for `", arg, "` should only select a single ", - "column (", length(res), " columns were selected).") + msg <- paste0( + "The selection for `", arg, "` should only select a single ", + "column (", length(res), " columns were selected)." + ) rlang::abort(msg) } } @@ -39,14 +41,14 @@ pad_measure_dims <- function(x) { most_freq <- as.integer(names(num_unique)[1]) # Pad each measure so they all have 'most_freq' rows - x$.measures <- purrr::map(x$.measures, ~{ + x$.measures <- purrr::map(x$.measures, ~ { df <- .x if (nrow(df) < most_freq) { # Calculate how many rows to add rows_to_add <- most_freq - nrow(df) # Create a data frame with the required number of missing rows missing_rows <- - purrr::map_dfc(names(df), ~rep(NA_real_, rows_to_add)) %>% + purrr::map_dfc(names(df), ~ rep(NA_real_, rows_to_add)) %>% tibble::as_tibble() %>% setNames(names(df)) %>% suppressMessages() # suppress message about new column names diff --git a/R/helpers.R b/R/helpers.R index 9a8c979..69dabba 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -35,7 +35,7 @@ matrix_to_measure <- function(x, loc) { } x <- t(x) - x <- tibble::as_tibble(x, .name_repair = "minimal") + x <- tibble::as_tibble(x, .name_repair = "minimal") res <- purrr::map(x, ~ tibble::new_tibble(list(location = loc, value = .x))) unname(res) @@ -70,10 +70,12 @@ check_has_measure <- function(x, cl) { if (!any(names(x) == ".measures")) { msg <- - paste0("It appears that the measurements have not been converted ", - "for the inernal format. See `step_measure_input_long()` ", - "and `step_measure_input_wide()` and use these prior to ", - step_fn) + paste0( + "It appears that the measurements have not been converted ", + "for the inernal format. See `step_measure_input_long()` ", + "and `step_measure_input_wide()` and use these prior to ", + step_fn + ) rlang::abort(msg) } } diff --git a/R/input_long.R b/R/input_long.R index d7cd715..4644cd6 100644 --- a/R/input_long.R +++ b/R/input_long.R @@ -154,12 +154,16 @@ print.step_measure_input_long <- #' @export tidy.step_measure_input_long <- function(x, ...) { if (is_trained(x)) { - res <- tibble(terms = x$columns[!is.na(x$columns)], - value = na_dbl) + res <- tibble( + terms = x$columns[!is.na(x$columns)], + value = na_dbl + ) } else { term_names <- sel2char(x$terms) - res <- tibble(terms = term_names, - value = na_dbl) + res <- tibble( + terms = term_names, + value = na_dbl + ) } res$id <- x$id res @@ -173,5 +177,3 @@ rename_long_cols <- function(.data, val_chr, loc_chr) { } dplyr::rename(.data, dplyr::all_of(res)) } - - diff --git a/R/input_wide.R b/R/input_wide.R index 1246029..46d7c7b 100644 --- a/R/input_wide.R +++ b/R/input_wide.R @@ -66,27 +66,27 @@ #' @export step_measure_input_wide <- - function(recipe, - ..., - role = "measure", - trained = FALSE, - columns = NULL, - location_values = NULL, - skip = FALSE, - id = rand_id("measure_input_wide")) { - add_step( - recipe, - step_measure_input_wide_new( - terms = enquos(...), - trained = trained, - role = role, - columns = columns, - location_values = location_values, - skip = skip, - id = id - ) + function(recipe, + ..., + role = "measure", + trained = FALSE, + columns = NULL, + location_values = NULL, + skip = FALSE, + id = rand_id("measure_input_wide")) { + add_step( + recipe, + step_measure_input_wide_new( + terms = enquos(...), + trained = trained, + role = role, + columns = columns, + location_values = location_values, + skip = skip, + id = id ) - } + ) + } step_measure_input_wide_new <- function(terms, role, trained, columns, location_values, skip, id) { @@ -111,8 +111,10 @@ prep.step_measure_input_wide <- function(x, training, info = NULL, ...) { num_inputs <- length(col_names) num_loc <- length(x$location_values) if (num_inputs != num_loc) { - msg <- paste0(num_inputs, " columns were selected as inputs but ", - "`location_values` has ", num_loc, " values.") + msg <- paste0( + num_inputs, " columns were selected as inputs but ", + "`location_values` has ", num_loc, " values." + ) rlang::abort(msg) } # if @@ -133,7 +135,6 @@ prep.step_measure_input_wide <- function(x, training, info = NULL, ...) { #' @export bake.step_measure_input_wide <- function(object, new_data, ...) { - # TODO check to make sure that the nested tibble has the same number of rows # in case the nesting was bad wide_to_list(new_data, object$location_values, object$terms) @@ -151,12 +152,16 @@ print.step_measure_input_wide <- #' @export tidy.step_measure_input_wide <- function(x, ...) { if (is_trained(x)) { - res <- tibble(terms = x$columns, - value = na_dbl) + res <- tibble( + terms = x$columns, + value = na_dbl + ) } else { term_names <- sel2char(x$terms) - res <- tibble(terms = term_names, - value = na_dbl) + res <- tibble( + terms = term_names, + value = na_dbl + ) } res$id <- x$id res diff --git a/R/output_long.R b/R/output_long.R index a35da42..2f7b03a 100644 --- a/R/output_long.R +++ b/R/output_long.R @@ -19,11 +19,11 @@ #' data(glucose_bioreactors) #' bioreactors_small$batch_sample <- NULL #' -#' small_tr <- bioreactors_small[ 1:200,] -#' small_te <- bioreactors_small[201:210,] +#' small_tr <- bioreactors_small[1:200, ] +#' small_te <- bioreactors_small[201:210, ] #' #' small_rec <- -#' recipe(glucose ~ . , data = small_tr) %>% +#' recipe(glucose ~ ., data = small_tr) %>% #' update_role(batch_id, day, new_role = "id columns") %>% #' step_measure_input_wide(`400`:`3050`) %>% #' prep() @@ -103,8 +103,10 @@ bake.step_measure_output_long <- function(object, new_data, ...) { print.step_measure_output_long <- function(x, width = max(20, options()$width - 30), ...) { title <- "Restructure analytical measurements to long format" - print_step(rlang::quos(""), rlang::quos(""), - x$trained, title, width) + print_step( + rlang::quos(""), rlang::quos(""), + x$trained, title, width + ) invisible(x) } @@ -112,11 +114,15 @@ print.step_measure_output_long <- #' @export tidy.step_measure_output_long <- function(x, ...) { if (is_trained(x)) { - res <- tibble(terms = na_chr, - value = na_dbl) + res <- tibble( + terms = na_chr, + value = na_dbl + ) } else { - res <- tibble(terms = na_chr, - value = na_dbl) + res <- tibble( + terms = na_chr, + value = na_dbl + ) } res$id <- x$id res @@ -127,5 +133,3 @@ value_to_tibble <- function(x, prefix = "measure_") { colnames(x) <- recipes::names0(ncol(x), prefix = prefix) dplyr::as_tibble(x) } - - diff --git a/R/output_wide.R b/R/output_wide.R index c5f022e..b30ac98 100644 --- a/R/output_wide.R +++ b/R/output_wide.R @@ -18,11 +18,11 @@ #' data(glucose_bioreactors) #' bioreactors_small$batch_sample <- NULL #' -#' small_tr <- bioreactors_small[ 1:200,] -#' small_te <- bioreactors_small[201:210,] +#' small_tr <- bioreactors_small[1:200, ] +#' small_te <- bioreactors_small[201:210, ] #' #' small_rec <- -#' recipe(glucose ~ . , data = small_tr) %>% +#' recipe(glucose ~ ., data = small_tr) %>% #' update_role(batch_id, day, new_role = "id columns") %>% #' step_measure_input_wide(`400`:`3050`) %>% #' prep() @@ -87,7 +87,6 @@ prep.step_measure_output_wide <- function(x, training, info = NULL, ...) { #' @export bake.step_measure_output_wide <- function(object, new_data, ...) { - non_meas <- names(new_data) non_meas <- non_meas[non_meas != ".measures"] @@ -117,13 +116,16 @@ print.step_measure_output_wide <- #' @export tidy.step_measure_output_wide <- function(x, ...) { if (is_trained(x)) { - res <- tibble(terms = na_chr, - value = na_dbl) + res <- tibble( + terms = na_chr, + value = na_dbl + ) } else { - res <- tibble(terms = na_chr, - value = na_dbl) + res <- tibble( + terms = na_chr, + value = na_dbl + ) } res$id <- x$id res } - diff --git a/R/savitzky_golay.R b/R/savitzky_golay.R index 65e475b..153bbb4 100644 --- a/R/savitzky_golay.R +++ b/R/savitzky_golay.R @@ -61,29 +61,28 @@ #' ) %>% #' prep() #' } - step_measure_savitzky_golay <- - function(recipe, - role = NA, - trained = FALSE, - degree = 3, - window_size = 11, - differentiation_order = 0, - skip = FALSE, - id = rand_id("measure_savitzky_golay")) { - recipes::add_step( - recipe, - step_measure_savitzky_golay_new( - trained = trained, - role = role, - degree = degree, - window_size = window_size, - differentiation_order = differentiation_order, - skip = FALSE, - id = id - ) + function(recipe, + role = NA, + trained = FALSE, + degree = 3, + window_size = 11, + differentiation_order = 0, + skip = FALSE, + id = rand_id("measure_savitzky_golay")) { + recipes::add_step( + recipe, + step_measure_savitzky_golay_new( + trained = trained, + role = role, + degree = degree, + window_size = window_size, + differentiation_order = differentiation_order, + skip = FALSE, + id = id ) - } + ) + } step_measure_savitzky_golay_new <- function(role, trained, degree, window_size, differentiation_order, @@ -107,14 +106,14 @@ prep.step_measure_savitzky_golay <- function(x, training, info = NULL, ...) { cli::cli_abort("{.arg degree} to {.fn step_measure_savitzky_golay} should be a single integer greater than zero.") } - if (!is.numeric(x$differentiation_order) | length(x$differentiation_order) != 1 - | x$differentiation_order < 0) { + if (!is.numeric(x$differentiation_order) | length(x$differentiation_order) != 1 | + x$differentiation_order < 0) { cli::cli_abort("The {.arg differentiation_order} argument to {.fn step_measure_savitzky_golay} should be a single integer greater than -1.") } - if (!is.numeric(x$window_size) | length(x$window_size) != 1 - | x$window_size < 1 | x$window_size %% 2 != 1) { + if (!is.numeric(x$window_size) | length(x$window_size) != 1 | + x$window_size < 1 | x$window_size %% 2 != 1) { cli::cli_abort("The {.arg window_size} argument to {.fn step_measure_savitzky_golay} should be a single odd integer greater than 0.") @@ -130,10 +129,10 @@ prep.step_measure_savitzky_golay <- function(x, training, info = NULL, ...) { } # filter length w must be greater than polynomial order p if (x$window_size <= x$degree) { - x$window_size <- x$degree + 1 - if (x$window_size %% 2 == 0) { - x$window_size <- x$window_size + 1 - } + x$window_size <- x$degree + 1 + if (x$window_size %% 2 == 0) { + x$window_size <- x$window_size + 1 + } cli::cli_warn("The {.arg window_size} argument to {.fn step_measure_savitzky_golay} should be greater than or equal to {.arg degree}. The polynomial degree was increased @@ -153,7 +152,6 @@ prep.step_measure_savitzky_golay <- function(x, training, info = NULL, ...) { #' @export bake.step_measure_savitzky_golay <- function(object, new_data, ...) { - res <- .comp_savitzky_golay( new_data$.measures, @@ -170,8 +168,10 @@ bake.step_measure_savitzky_golay <- function(object, new_data, ...) { print.step_measure_savitzky_golay <- function(x, width = max(20, options()$width - 30), ...) { title <- "Savitzky-Golay preprocessing " - recipes::print_step("", "", - x$trained, title, width) + recipes::print_step( + "", "", + x$trained, title, width + ) invisible(x) } @@ -181,12 +181,16 @@ print.step_measure_savitzky_golay <- #' @export tidy.step_measure_savitzky_golay <- function(x, ...) { if (is_trained(x)) { - res <- tibble::tibble(terms = ".measure", - value = na_dbl) + res <- tibble::tibble( + terms = ".measure", + value = na_dbl + ) } else { term_names <- recipes::sel2char(x$terms) - res <- tibble::tibble(terms = term_names, - value = na_dbl) + res <- tibble::tibble( + terms = term_names, + value = na_dbl + ) } res$id <- x$id res @@ -235,4 +239,3 @@ required_pkgs.step_isomap <- function(x, ...) { matrix_to_measure(res, loc) } - diff --git a/tests/testthat/helpers-data.R b/tests/testthat/helpers-data.R index 69c1c6f..f5767ce 100644 --- a/tests/testthat/helpers-data.R +++ b/tests/testthat/helpers-data.R @@ -5,8 +5,10 @@ data_meat_long <- function() { library(tidyr) library(dplyr) - inds <- tibble(temp = grep("^x_", names(meats), value = TRUE), - ind = seq(1, 17, length.out = 100)) + inds <- tibble( + temp = grep("^x_", names(meats), value = TRUE), + ind = seq(1, 17, length.out = 100) + ) meats_2 <- meats %>% @@ -19,13 +21,15 @@ data_meat_long <- function() { full_join(inds, by = "temp") %>% select(-temp) - inds <- tibble(temp = grep("^x_", names(meats), value = TRUE), - ind = seq(1, 17, length.out = 100)) + inds <- tibble( + temp = grep("^x_", names(meats), value = TRUE), + ind = seq(1, 17, length.out = 100) + ) list( train = meats_2 %>% filter(.sample_num <= 200), - test = meats_2 %>% filter(.sample_num > 200) - ) + test = meats_2 %>% filter(.sample_num > 200) + ) } data_meat_wide <- function() { @@ -42,10 +46,7 @@ data_meat_wide <- function() { list( train = meats_2 %>% filter(.sample_num <= 200), - test = meats_2 %>% filter(.sample_num > 200), + test = meats_2 %>% filter(.sample_num > 200), ind = seq(1, 17, length.out = 100) ) } - - - diff --git a/tests/testthat/test-data-transformations.R b/tests/testthat/test-data-transformations.R index 6ff2a6c..fe5df1c 100644 --- a/tests/testthat/test-data-transformations.R +++ b/tests/testthat/test-data-transformations.R @@ -1,5 +1,4 @@ test_that("transform measure to matrix", { - rec <- recipe(water + fat + protein ~ ., data = meats_long) %>% update_role(id, new_role = "id") %>% @@ -18,11 +17,9 @@ test_that("transform measure to matrix", { meats_long[meats_long$id == ids[i], "transmittance"][[1]] ) } - }) test_that("transform matrix to measure", { - rec <- recipe(water + fat + protein ~ ., data = meats_long) %>% update_role(id, new_role = "id") %>% @@ -41,11 +38,9 @@ test_that("transform matrix to measure", { rec$template$.measures[[i]] ) } - }) test_that("transform tidy format", { - rec <- recipe(water + fat + protein ~ ., data = meats_long) %>% update_role(id, new_role = "id") %>% @@ -57,5 +52,4 @@ test_that("transform tidy format", { names(exp_df) <- c("location", "value", "sample_num") expect_equal(spect_df, exp_df) - }) diff --git a/tests/testthat/test_measure_input_long.R b/tests/testthat/test_measure_input_long.R index 1aa20c1..988559a 100644 --- a/tests/testthat/test_measure_input_long.R +++ b/tests/testthat/test_measure_input_long.R @@ -7,7 +7,7 @@ test_that("ingest long format data", { na_test$absorp[2] <- NA_real_ miss_train <- meats_data$train %>% dplyr::slice(-5) - miss_test <- meats_data$test %>% dplyr::slice(-5) + miss_test <- meats_data$test %>% dplyr::slice(-5) # ---------------------------------------------------------------------------- @@ -32,18 +32,18 @@ test_that("ingest long format data", { protein = numeric(0), .measures = list() ) - expect_equal(bake_1[0,], dat_ptype) + expect_equal(bake_1[0, ], dat_ptype) measure_ptype <- tibble::tibble( value = numeric(0), location = numeric(0) ) - expect_equal(bake_1$.measures[[1]][0,], measure_ptype) + expect_equal(bake_1$.measures[[1]][0, ], measure_ptype) expect_equal(nrow(bake_1), 200L) expect_equal(dim(bake_1$.measures[[1]]), c(100L, 2L)) bake_1_te <- bake(prep_1, new_data = meats_data$test) - expect_equal(bake_1_te$.measures[[1]][0,], measure_ptype) + expect_equal(bake_1_te$.measures[[1]][0, ], measure_ptype) expect_equal(nrow(bake_1_te), 15L) expect_equal(dim(bake_1_te$.measures[[1]]), c(100L, 2L)) diff --git a/tests/testthat/test_measure_input_wide.R b/tests/testthat/test_measure_input_wide.R index 154cb94..5887323 100644 --- a/tests/testthat/test_measure_input_wide.R +++ b/tests/testthat/test_measure_input_wide.R @@ -29,18 +29,18 @@ test_that("ingest wide format data", { protein = numeric(0), .measures = list() ) - expect_equal(bake_1[0,], dat_ptype) + expect_equal(bake_1[0, ], dat_ptype) measure_ptype <- tibble::tibble( value = numeric(0), location = numeric(0) ) - expect_equal(bake_1$.measures[[1]][0,], measure_ptype) + expect_equal(bake_1$.measures[[1]][0, ], measure_ptype) expect_equal(nrow(bake_1), 200L) expect_equal(dim(bake_1$.measures[[1]]), c(100L, 2L)) bake_1_te <- bake(prep_1, new_data = meats_data$test) - expect_equal(bake_1_te$.measures[[1]][0,], measure_ptype) + expect_equal(bake_1_te$.measures[[1]][0, ], measure_ptype) expect_equal(nrow(bake_1_te), 15L) expect_equal(dim(bake_1_te$.measures[[1]]), c(100L, 2L)) @@ -58,5 +58,4 @@ test_that("ingest wide format data", { prep(), error = TRUE ) - }) diff --git a/tests/testthat/test_measure_output_long.R b/tests/testthat/test_measure_output_long.R index c464712..f6cfbb4 100644 --- a/tests/testthat/test_measure_output_long.R +++ b/tests/testthat/test_measure_output_long.R @@ -1,7 +1,7 @@ test_that("output long format data", { meats_data <- data_meat_long() meats_train <- meats_data$train %>% filter(ind < 1.2) - meats_test <- meats_data$test %>% filter(ind < 1.2) + meats_test <- meats_data$test %>% filter(ind < 1.2) na_train <- meats_train na_train$absorp[1] <- NA_real_ @@ -9,7 +9,7 @@ test_that("output long format data", { na_test$absorp[1] <- NA_real_ miss_train <- meats_train %>% dplyr::slice(-2) - miss_test <- meats_test %>% dplyr::slice(-2) + miss_test <- meats_test %>% dplyr::slice(-2) # ---------------------------------------------------------------------------- @@ -36,11 +36,11 @@ test_that("output long format data", { .measure = numeric(0), .location = numeric(0) ) - expect_equal(bake_1[0,], dat_ptype) + expect_equal(bake_1[0, ], dat_ptype) expect_equal(nrow(bake_1), 400L) bake_1_te <- bake(prep_1, new_data = meats_test) - expect_equal(bake_1_te[0,], dat_ptype) + expect_equal(bake_1_te[0, ], dat_ptype) expect_equal(nrow(bake_1_te), 30L) ### missing rows @@ -70,7 +70,7 @@ test_that("output long format data", { rstudio = numeric(0), posit = numeric(0) ) - expect_equal(bake_2[0,], dat_ptype) + expect_equal(bake_2[0, ], dat_ptype) expect_equal(nrow(bake_2), 400L) ## missing values @@ -92,5 +92,4 @@ test_that("output long format data", { prep(), error = TRUE ) - }) diff --git a/tests/testthat/test_measure_output_wide.R b/tests/testthat/test_measure_output_wide.R index 2754cb2..fc84ef9 100644 --- a/tests/testthat/test_measure_output_wide.R +++ b/tests/testthat/test_measure_output_wide.R @@ -1,7 +1,7 @@ test_that("output wide format data", { meats_data <- data_meat_long() meats_train <- meats_data$train %>% filter(ind < 1.2) - meats_test <- meats_data$test %>% filter(ind < 1.2) + meats_test <- meats_data$test %>% filter(ind < 1.2) na_train <- meats_train na_train$absorp[1] <- NA_real_ @@ -9,7 +9,7 @@ test_that("output wide format data", { na_test$absorp[1] <- NA_real_ miss_train <- meats_train %>% dplyr::slice(-2) - miss_test <- meats_test %>% dplyr::slice(-2) + miss_test <- meats_test %>% dplyr::slice(-2) # ---------------------------------------------------------------------------- @@ -36,11 +36,11 @@ test_that("output wide format data", { measure_1.000000 = numeric(0), measure_1.161616 = numeric(0) ) - expect_equal(bake_1[0,], dat_ptype) + expect_equal(bake_1[0, ], dat_ptype) expect_equal(nrow(bake_1), 200L) bake_1_te <- bake(prep_1, new_data = meats_test) - expect_equal(bake_1_te[0,], dat_ptype) + expect_equal(bake_1_te[0, ], dat_ptype) expect_equal(nrow(bake_1_te), 15L) ### missing rows diff --git a/tests/testthat/test_savitzky_golay.R b/tests/testthat/test_savitzky_golay.R index eaf6a11..837585e 100644 --- a/tests/testthat/test_savitzky_golay.R +++ b/tests/testthat/test_savitzky_golay.R @@ -26,12 +26,12 @@ test_that("savitzky-golay computations", { measure:::measure_to_matrix() prosp_res <- prospectr::savitzkyGolay(spect_start, - m = grid$diffs[i], - p = grid$deg[i], - w = grid$wn[i]) + m = grid$diffs[i], + p = grid$deg[i], + w = grid$wn[i] + ) expect_equal(meas_res, prosp_res) } - }) test_that("savitzky-golay inputs", { @@ -50,60 +50,60 @@ test_that("savitzky-golay inputs", { bad_inputs <- tibble::tribble( ~diffs, ~deg, ~wn, - 2L, 1L, 5, - 3L, 1L, 5, - 3L, 2L, 5, - 0L, 5L, 5, - 1L, 5L, 5, - 2L, 5L, 5, - 3L, 5L, 5, - 0L, 6L, 5, - 1L, 6L, 5, - 2L, 6L, 5, - 3L, 6L, 5, - 0L, 1L, 10, - 1L, 1L, 10, - 2L, 1L, 10, - 3L, 1L, 10, - 0L, 2L, 10, - 1L, 2L, 10, - 2L, 2L, 10, - 3L, 2L, 10, - 0L, 3L, 10, - 1L, 3L, 10, - 2L, 3L, 10, - 3L, 3L, 10, - 0L, 4L, 10, - 1L, 4L, 10, - 2L, 4L, 10, - 3L, 4L, 10, - 0L, 5L, 10, - 1L, 5L, 10, - 2L, 5L, 10, - 3L, 5L, 10, - 0L, 6L, 10, - 1L, 6L, 10, - 2L, 6L, 10, - 3L, 6L, 10, - 2L, 1L, 15, - 3L, 1L, 15, - 3L, 2L, 15 + 2L, 1L, 5, + 3L, 1L, 5, + 3L, 2L, 5, + 0L, 5L, 5, + 1L, 5L, 5, + 2L, 5L, 5, + 3L, 5L, 5, + 0L, 6L, 5, + 1L, 6L, 5, + 2L, 6L, 5, + 3L, 6L, 5, + 0L, 1L, 10, + 1L, 1L, 10, + 2L, 1L, 10, + 3L, 1L, 10, + 0L, 2L, 10, + 1L, 2L, 10, + 2L, 2L, 10, + 3L, 2L, 10, + 0L, 3L, 10, + 1L, 3L, 10, + 2L, 3L, 10, + 3L, 3L, 10, + 0L, 4L, 10, + 1L, 4L, 10, + 2L, 4L, 10, + 3L, 4L, 10, + 0L, 5L, 10, + 1L, 5L, 10, + 2L, 5L, 10, + 3L, 5L, 10, + 0L, 6L, 10, + 1L, 6L, 10, + 2L, 6L, 10, + 3L, 6L, 10, + 2L, 1L, 15, + 3L, 1L, 15, + 3L, 2L, 15 ) for (i in 1:nrow(bad_inputs)) { - if (bad_inputs$wn[i] == 10) { - expect_snapshot({ - rec %>% - step_measure_savitzky_golay( - differentiation_order = bad_inputs$diffs[i], - window_size = bad_inputs$wn[i], - degree = bad_inputs$deg[i] - ) %>% - prep() - }, - error = TRUE) - + expect_snapshot( + { + rec %>% + step_measure_savitzky_golay( + differentiation_order = bad_inputs$diffs[i], + window_size = bad_inputs$wn[i], + degree = bad_inputs$deg[i] + ) %>% + prep() + }, + error = TRUE + ) } else { expect_snapshot_warning({ rec %>% @@ -115,8 +115,5 @@ test_that("savitzky-golay inputs", { prep() }) } - } - }) - diff --git a/vignettes/baseline.Rmd b/vignettes/baseline.Rmd index 5f596b0..e061c59 100644 --- a/vignettes/baseline.Rmd +++ b/vignettes/baseline.Rmd @@ -29,30 +29,32 @@ theme_set(theme_light()) Before we can perform baseline correction, we need to reshape the data. It is in a wide format where the columns represent the wavelength and the value is transmittance. The `step_baseline()` function operates on long format data. We can reshape this data with `{tidyr}`. ```{r} -meats2 <- +meats2 <- meats %>% - rowid_to_column(var = "id") %>% - pivot_longer(cols = starts_with("x_"), - names_to = "channel", - values_to = "transmittance") %>% + rowid_to_column(var = "id") %>% + pivot_longer( + cols = starts_with("x_"), + names_to = "channel", + values_to = "transmittance" + ) %>% mutate(channel = str_extract(channel, "[:digit:]+") %>% as.integer()) ``` ```{r} meats2 %>% - ggplot(aes(x = channel, y = transmittance, group = id)) + - geom_line(alpha = 0.5) + theme_light() + ggplot(aes(x = channel, y = transmittance, group = id)) + + geom_line(alpha = 0.5) + + theme_light() ``` ```{r} -meats2 %>% - group_by(id) %>% - subtract_rf_baseline(yvar = transmittance, span = 0.8) %>% - ggplot(aes(x = channel, color = water, group = id)) + +meats2 %>% + group_by(id) %>% + subtract_rf_baseline(yvar = transmittance, span = 0.8) %>% + ggplot(aes(x = channel, color = water, group = id)) + geom_line(aes(y = transmittance), alpha = 0.5) + - scale_color_viridis_c() + + scale_color_viridis_c() + ggtitle("") - ``` diff --git a/vignettes/recipes.Rmd b/vignettes/recipes.Rmd index 830740d..ae8ae5c 100644 --- a/vignettes/recipes.Rmd +++ b/vignettes/recipes.Rmd @@ -58,7 +58,7 @@ The recipe does not yet contain any steps. ```{r} rec_obj$steps -rec_obj_add_step <- rec_obj %>% +rec_obj_add_step <- rec_obj %>% step_impute_knn(all_predictors()) rec_obj_add_step$steps From 7e410b41c061f5ca6ea1529dbf90f2427df7f4f8 Mon Sep 17 00:00:00 2001 From: James Wade Date: Fri, 2 Feb 2024 14:39:43 -0500 Subject: [PATCH 11/11] [docs] style man pages --- man/step_measure_output_long.Rd | 6 +++--- man/step_measure_output_wide.Rd | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/man/step_measure_output_long.Rd b/man/step_measure_output_long.Rd index 94dfe1b..95367c8 100644 --- a/man/step_measure_output_long.Rd +++ b/man/step_measure_output_long.Rd @@ -54,11 +54,11 @@ library(dplyr) data(glucose_bioreactors) bioreactors_small$batch_sample <- NULL -small_tr <- bioreactors_small[ 1:200,] -small_te <- bioreactors_small[201:210,] +small_tr <- bioreactors_small[1:200, ] +small_te <- bioreactors_small[201:210, ] small_rec <- - recipe(glucose ~ . , data = small_tr) \%>\% + recipe(glucose ~ ., data = small_tr) \%>\% update_role(batch_id, day, new_role = "id columns") \%>\% step_measure_input_wide(`400`:`3050`) \%>\% prep() diff --git a/man/step_measure_output_wide.Rd b/man/step_measure_output_wide.Rd index abeca9d..2836518 100644 --- a/man/step_measure_output_wide.Rd +++ b/man/step_measure_output_wide.Rd @@ -52,11 +52,11 @@ library(dplyr) data(glucose_bioreactors) bioreactors_small$batch_sample <- NULL -small_tr <- bioreactors_small[ 1:200,] -small_te <- bioreactors_small[201:210,] +small_tr <- bioreactors_small[1:200, ] +small_te <- bioreactors_small[201:210, ] small_rec <- - recipe(glucose ~ . , data = small_tr) \%>\% + recipe(glucose ~ ., data = small_tr) \%>\% update_role(batch_id, day, new_role = "id columns") \%>\% step_measure_input_wide(`400`:`3050`) \%>\% prep()