Skip to content

Commit

Permalink
Merge pull request #14 from topepo/output-steps
Browse files Browse the repository at this point in the history
Output steps
  • Loading branch information
JamesHWade authored Jan 8, 2024
2 parents 3de6499 + 261c468 commit bd3a0c5
Show file tree
Hide file tree
Showing 18 changed files with 851 additions and 134 deletions.
14 changes: 8 additions & 6 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,12 @@ Authors@R:
comment = c(ORCID = "0000-0002-9740-1905"))
Description: Analytical measurements...
License: MIT + file LICENSE
URL: https://jameshwade.github.io/measure, https://github.com/jameshwade/measure
URL: https://jameshwade.github.io/measure,
https://github.com/jameshwade/measure
BugReports: https://github.com/jameshwade/measure/issues
Depends:
R (>= 3.5.0),
recipes
Imports:
cli,
dplyr,
Expand All @@ -28,12 +32,10 @@ Suggests:
tidymodels,
tidyverse,
withr
VignetteBuilder:
knitr
Config/testthat/edition: 3
Encoding: UTF-8
LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
VignetteBuilder: knitr
Depends:
R (>= 3.5.0),
recipes
LazyData: true
10 changes: 10 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,25 @@

S3method(bake,step_measure_input_long)
S3method(bake,step_measure_input_wide)
S3method(bake,step_measure_output_long)
S3method(bake,step_measure_output_wide)
S3method(prep,step_measure_input_long)
S3method(prep,step_measure_input_wide)
S3method(prep,step_measure_output_long)
S3method(prep,step_measure_output_wide)
S3method(print,step_baseline)
S3method(print,step_measure_input_long)
S3method(print,step_measure_input_wide)
S3method(print,step_measure_output_long)
S3method(print,step_measure_output_wide)
S3method(tidy,step_measure_input_long)
S3method(tidy,step_measure_input_wide)
S3method(tidy,step_measure_output_long)
S3method(tidy,step_measure_output_wide)
export(step_measure_input_long)
export(step_measure_input_wide)
export(step_measure_output_long)
export(step_measure_output_wide)
export(subtract_rf_baseline)
import(recipes)
import(rlang)
Expand Down
19 changes: 19 additions & 0 deletions R/helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,22 @@ add_location <- function(.data, loc) {
.measures = purrr::map(.measures, add_location_col, loc = loc)
)
}

# ------------------------------------------------------------------------------
# Make sue that data are in the correct format.

check_has_measure <- function(x, cl) {
step_fn <- as.character(cl[[1]])
step_fn <- gsub("prep\\.", "", step_fn)
step_fn <- paste0("`", step_fn, "()`.")


if (!any(names(x) == ".measures")) {
msg <-
paste0("It appears that the measurements have not been converted ",
"for the inernal format. See `step_measure_input_long()` ",
"and `step_measure_input_wide()` and use these prior to ",
step_fn)
rlang::abort(msg)
}
}
1 change: 1 addition & 0 deletions R/input_long.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#' step that converts measures organized in a column for the analytical results
#' (and an option column of numeric indices) into an internal format used by
#' the package.
#' @family input/output steps
#' @inheritParams recipes::step_center
#' @param ... One or more selector functions to choose which _single_ column
#' contains the analytical measurements. The selection should be in the order
Expand Down
1 change: 1 addition & 0 deletions R/input_wide.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#' step that converts measures organized in multiple columns into an internal
#' format used by the package.
#'
#' @family input/output steps
#' @inheritParams recipes::step_center
#' @param columns A character string of the selected variable names. This field
#' is a placeholder and will be populated once [prep()] is used.
Expand Down
131 changes: 131 additions & 0 deletions R/output_long.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
#' Reorganize Measurements to Two Columns
#'
#' `step_measure_output_long` creates a *specification* of a recipe
#' step that converts measures to a format with columns for the measurement and
#' the corresponding location (i.e., "long" format).
#'
#' @family input/output steps
#' @inheritParams recipes::step_center
#' @param values_to A single character string for the column containing the
#' analytical maesurement.
#' @param location_to A single character string for the column containing the
#' location of the measurement (e.g. wavenumber or index).
#' @details
#' This step is designed convert analytical measurements from their internal
#' data structure to a two column format.
#' @examples
#' library(dplyr)
#'
#' data(glucose_bioreactors)
#' bioreactors_small$batch_sample <- NULL
#'
#' small_tr <- bioreactors_small[ 1:200,]
#' small_te <- bioreactors_small[201:210,]
#'
#' small_rec <-
#' recipe(glucose ~ . , data = small_tr) %>%
#' update_role(batch_id, day, new_role = "id columns") %>%
#' step_measure_input_wide(`400`:`3050`) %>%
#' prep()
#'
#' # Before reformatting:
#'
#' small_rec %>% bake(new_data = small_te)
#'
#' # After reformatting:
#'
#' output_rec <-
#' small_rec %>%
#' step_measure_output_long() %>%
#' prep()
#'
#' output_rec %>% bake(new_data = small_te)
#'
#' @export

step_measure_output_long <-
function(recipe,
values_to = ".measure",
location_to = ".location",
role = "predictor",
trained = FALSE,
skip = FALSE,
id = rand_id("measure_output_long")) {
add_step(
recipe,
step_measure_output_long_new(
values_to = values_to,
location_to = location_to,
trained = trained,
role = role,
skip = skip,
id = id
)
)
}

step_measure_output_long_new <-
function(values_to, location_to, role, trained, skip, id) {
step(
subclass = "measure_output_long",
values_to = values_to,
location_to = location_to,
role = role,
trained = trained,
skip = skip,
id = id
)
}

#' @export
prep.step_measure_output_long <- function(x, training, info = NULL, ...) {
check_has_measure(training, match.call())
step_measure_output_long_new(
values_to = x$values_to,
location_to = x$location_to,
role = x$role,
trained = TRUE,
skip = x$skip,
id = x$id
)
}

#' @export
bake.step_measure_output_long <- function(object, new_data, ...) {
rnm <- c(".measures.value", ".measures.location")
names(rnm) <- c(object$values_to, object$location_to)
new_data %>%
tidyr::unnest(cols = c(.measures), names_sep = ".") %>%
dplyr::rename(!!rnm)
}

#' @export
print.step_measure_output_long <-
function(x, width = max(20, options()$width - 30), ...) {
title <- "Restructure analytical measurements to long format"
print_step(rlang::quos("<internal data>"), rlang::quos("<internal data>"),
x$trained, title, width)
invisible(x)
}

#' @rdname tidy.recipe
#' @export
tidy.step_measure_output_long <- function(x, ...) {
if (is_trained(x)) {
res <- tibble(terms = na_chr,
value = na_dbl)
} else {
res <- tibble(terms = na_chr,
value = na_dbl)
}
res$id <- x$id
res
}

value_to_tibble <- function(x, prefix = "measure_") {
x <- matrix(x$value, nrow = 1)
colnames(x) <- recipes::names0(ncol(x), prefix = prefix)
dplyr::as_tibble(x)
}


127 changes: 127 additions & 0 deletions R/output_wide.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
#' Reorganize Measurements to Separate Columns
#'
#' `step_measure_output_wide` creates a *specification* of a recipe
#' step that converts measures to multiple columns (i.e., "wide" format).
#' @family input/output steps
#' @inheritParams recipes::step_center
#' @param prefix A character string used to name the new columns.
#' @details
#' This step is designed convert analytical measurements from their internal
#' data structure to separate columns.
#'
#' Wide outputs can be helpful when you want to use standard recipes steps with
#' the measuresments, such as [recipes::step_pca()], [recipes::step_pls()], and
#' so on.
#' @examples
#' library(dplyr)
#'
#' data(glucose_bioreactors)
#' bioreactors_small$batch_sample <- NULL
#'
#' small_tr <- bioreactors_small[ 1:200,]
#' small_te <- bioreactors_small[201:210,]
#'
#' small_rec <-
#' recipe(glucose ~ . , data = small_tr) %>%
#' update_role(batch_id, day, new_role = "id columns") %>%
#' step_measure_input_wide(`400`:`3050`) %>%
#' prep()
#'
#' # Before reformatting:
#'
#' small_rec %>% bake(new_data = small_te)
#'
#' # After reformatting:
#'
#' output_rec <-
#' small_rec %>%
#' step_measure_output_wide() %>%
#' prep()
#'
#' output_rec %>% bake(new_data = small_te)
#'
#' @export

step_measure_output_wide <-
function(recipe,
prefix = "measure_",
role = "predictor",
trained = FALSE,
skip = FALSE,
id = rand_id("measure_output_wide")) {
add_step(
recipe,
step_measure_output_wide_new(
prefix = prefix,
trained = trained,
role = role,
skip = skip,
id = id
)
)
}

step_measure_output_wide_new <-
function(prefix, role, trained, skip, id) {
step(
subclass = "measure_output_wide",
prefix = prefix,
role = role,
trained = trained,
skip = skip,
id = id
)
}

#' @export
prep.step_measure_output_wide <- function(x, training, info = NULL, ...) {
check_has_measure(training, match.call())
step_measure_output_wide_new(
prefix = x$prefix,
role = x$role,
trained = TRUE,
skip = x$skip,
id = x$id
)
}

#' @export
bake.step_measure_output_wide <- function(object, new_data, ...) {

non_meas <- names(new_data)
non_meas <- non_meas[non_meas != ".measures"]

res <-
new_data %>%
tidyr::unnest(cols = c(.measures)) %>%
dplyr::mutate(location = gsub(" ", "0", format(location))) %>%
tidyr::pivot_wider(
id_cols = c(dplyr::all_of(non_meas)),
names_from = "location",
values_from = "value"
) %>%
dplyr::rename_with(~ paste0(object$prefix, .x), c(-dplyr::all_of(non_meas)))
}

#' @export
print.step_measure_output_wide <-
function(x, width = max(20, options()$width - 30), ...) {
title <- "Restructure analytical measurements to wide format"
print_step(rlang::quos("<internal data>"), rlang::quos("<internal data>"), x$trained, title, width)
invisible(x)
}

#' @rdname tidy.recipe
#' @export
tidy.step_measure_output_wide <- function(x, ...) {
if (is_trained(x)) {
res <- tibble(terms = na_chr,
value = na_dbl)
} else {
res <- tibble(terms = na_chr,
value = na_dbl)
}
res$id <- x$id
res
}

7 changes: 7 additions & 0 deletions man/step_measure_input_long.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions man/step_measure_input_wide.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit bd3a0c5

Please sign in to comment.