Skip to content

Commit

Permalink
Merge pull request #17 from JamesHWade/feature/pad-missing-values
Browse files Browse the repository at this point in the history
[Feature] Pad Missing Values
  • Loading branch information
JamesHWade authored Mar 3, 2024
2 parents bc0383a + 7e410b4 commit cdc7b5a
Show file tree
Hide file tree
Showing 31 changed files with 387 additions and 292 deletions.
15 changes: 1 addition & 14 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,5 @@
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
#
# NOTE: This workflow is overkill for most R packages and
# check-standard.yaml is likely a better choice.
# usethis::use_github_action("check-standard") will install it.
on:
push:
branches: [main, master]
Expand All @@ -23,26 +19,17 @@ jobs:
matrix:
config:
- {os: macos-latest, r: 'release'}

- {os: windows-latest, r: 'release'}
# Use 3.6 to trigger usage of RTools35
- {os: windows-latest, r: '3.6'}
# use 4.1 to check with rtools40's older compiler
- {os: windows-latest, r: '4.1'}

- {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}
- {os: ubuntu-latest, r: 'release'}
- {os: ubuntu-latest, r: 'oldrel-1'}
- {os: ubuntu-latest, r: 'oldrel-2'}
- {os: ubuntu-latest, r: 'oldrel-3'}
- {os: ubuntu-latest, r: 'oldrel-4'}

env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
R_KEEP_PKG_SOURCE: yes

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- uses: r-lib/actions/setup-pandoc@v2

Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/pr-commands.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- uses: r-lib/actions/pr-fetch@v2
with:
Expand Down Expand Up @@ -51,7 +51,7 @@ jobs:
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- uses: r-lib/actions/pr-fetch@v2
with:
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/test-coverage.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- uses: r-lib/actions/setup-r@v2
with:
Expand All @@ -31,7 +31,7 @@ jobs:
covr::codecov(
quiet = FALSE,
clean = FALSE,
install_path = file.path(Sys.getenv("RUNNER_TEMP"), "package")
install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package")
)
shell: Rscript {0}

Expand All @@ -44,7 +44,7 @@ jobs:

- name: Upload test results
if: failure()
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: coverage-test-failures
path: ${{ runner.temp }}/package
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,4 @@ Config/testthat/edition: 3
Encoding: UTF-8
LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3.9000
RoxygenNote: 7.3.1
13 changes: 7 additions & 6 deletions R/baseline.R
Original file line number Diff line number Diff line change
Expand Up @@ -70,17 +70,18 @@ print.step_baseline <-
#'
#' @examples
#' meats_long %>% subtract_rf_baseline(yvar = transmittance)
subtract_rf_baseline <- function(data, yvar, span = 2/3, maxit = c(5, 5)){

subtract_rf_baseline <- function(data, yvar, span = 2 / 3, maxit = c(5, 5)) {
# rlang::arg_match0(as.character(rlang::enquo(yvar)), values = names(data))

data %>%
dplyr::mutate(
raw = {{ yvar }},
baseline = IDPmisc::rfbaseline(x = 1:length({{ yvar }}),
y = {{ yvar }},
span = span,
maxit = maxit)$fit,
baseline = IDPmisc::rfbaseline(
x = 1:length({{ yvar }}),
y = {{ yvar }},
span = span,
maxit = maxit
)$fit,
{{ yvar }} := {{ yvar }} - baseline
)
}
Expand Down
33 changes: 31 additions & 2 deletions R/checks.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@ check_missing_measures <- function(.data, loc) {

check_single_selector <- function(res, arg) {
if (length(res) != 1) {
msg <- paste0("The selection for `", arg, "` should only select a single ",
"column (", length(res), " columns were selected).")
msg <- paste0(
"The selection for `", arg, "` should only select a single ",
"column (", length(res), " columns were selected)."
)
rlang::abort(msg)
}
}
Expand All @@ -31,3 +33,30 @@ check_measure_dims <- function(x) {
}
invisible(NULL)
}

pad_measure_dims <- function(x) {
# Determine the most frequent number of rows
num_rows <- purrr::map_int(x$.measures, nrow)
num_unique <- sort(table(num_rows), decreasing = TRUE)
most_freq <- as.integer(names(num_unique)[1])

# Pad each measure so they all have 'most_freq' rows
x$.measures <- purrr::map(x$.measures, ~ {
df <- .x
if (nrow(df) < most_freq) {
# Calculate how many rows to add
rows_to_add <- most_freq - nrow(df)
# Create a data frame with the required number of missing rows
missing_rows <-
purrr::map_dfc(names(df), ~ rep(NA_real_, rows_to_add)) %>%
tibble::as_tibble() %>%
setNames(names(df)) %>%
suppressMessages() # suppress message about new column names
# Bind the missing rows to the original data frame
df <- bind_rows(df, missing_rows)
df
}
df
})
x
}
File renamed without changes.
12 changes: 7 additions & 5 deletions R/helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ matrix_to_measure <- function(x, loc) {
}

x <- t(x)
x <- tibble::as_tibble(x, .name_repair = "minimal")
x <- tibble::as_tibble(x, .name_repair = "minimal")

res <- purrr::map(x, ~ tibble::new_tibble(list(location = loc, value = .x)))
unname(res)
Expand Down Expand Up @@ -70,10 +70,12 @@ check_has_measure <- function(x, cl) {

if (!any(names(x) == ".measures")) {
msg <-
paste0("It appears that the measurements have not been converted ",
"for the inernal format. See `step_measure_input_long()` ",
"and `step_measure_input_wide()` and use these prior to ",
step_fn)
paste0(
"It appears that the measurements have not been converted ",
"for the inernal format. See `step_measure_input_long()` ",
"and `step_measure_input_wide()` and use these prior to ",
step_fn
)
rlang::abort(msg)
}
}
38 changes: 25 additions & 13 deletions R/input_long.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,16 @@
#' @family input/output steps
#' @inheritParams recipes::step_center
#' @param ... One or more selector functions to choose which _single_ column
#' contains the analytical measurements. The selection should be in the order
#' of the measurement's profile.
#' contains the analytical measurements. The selection should be in the order
#' of the measurement's profile.
#' @param location One or more selector functions to choose which _single_
#' column has the locations of the analytical values.
#' column has the locations of the analytical values.
#' @param pad Whether to pad the measurements to ensure that they all have the
#' same number of values. This is useful when there are missing values in the
#' measurements.
#' @param columns A character vector of column names determined by the recipe.
#' @details
#' This step is designed for data in a format where there is a column for the
#' analytical measurement (e.g., absorption, etc.) and another with the
#' @details This step is designed for data in a format where there is a column
#' for the analytical measurement (e.g., absorption, etc.) and another with the
#' location of the value (e.g., wave number, etc.).
#'
#' `step_measure_input_long()` will collect those data and put them into a
Expand Down Expand Up @@ -48,6 +50,7 @@ step_measure_input_long <-
function(recipe,
...,
location,
pad = FALSE,
role = "measure",
trained = FALSE,
columns = NULL,
Expand All @@ -64,21 +67,23 @@ step_measure_input_long <-
role = role,
columns = columns,
location = location,
pad = pad,
skip = skip,
id = id
)
)
}

step_measure_input_long_new <-
function(terms, role, trained, columns, location, na_rm, skip, id) {
function(terms, role, trained, columns, location, pad, skip, id) {
step(
subclass = "measure_input_long",
terms = terms,
role = role,
trained = trained,
columns = columns,
location = location,
pad = pad,
skip = skip,
id = id
)
Expand All @@ -104,6 +109,7 @@ prep.step_measure_input_long <- function(x, training, info = NULL, ...) {
trained = TRUE,
columns = unname(c(value_name, loc_name)),
location = x$location,
pad = x$pad,
skip = x$skip,
id = x$id
)
Expand All @@ -125,6 +131,10 @@ bake.step_measure_input_long <- function(object, new_data, ...) {
tidyr::nest(.by = c(-value), .key = ".measures")
}

if (rlang::is_true(object$pad)) {
new_data <- pad_measure_dims(new_data)
}

check_measure_dims(new_data)

new_data
Expand All @@ -144,12 +154,16 @@ print.step_measure_input_long <-
#' @export
tidy.step_measure_input_long <- function(x, ...) {
if (is_trained(x)) {
res <- tibble(terms = x$columns[!is.na(x$columns)],
value = na_dbl)
res <- tibble(
terms = x$columns[!is.na(x$columns)],
value = na_dbl
)
} else {
term_names <- sel2char(x$terms)
res <- tibble(terms = term_names,
value = na_dbl)
res <- tibble(
terms = term_names,
value = na_dbl
)
}
res$id <- x$id
res
Expand All @@ -163,5 +177,3 @@ rename_long_cols <- function(.data, val_chr, loc_chr) {
}
dplyr::rename(.data, dplyr::all_of(res))
}


61 changes: 33 additions & 28 deletions R/input_wide.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,30 +66,30 @@
#' @export

step_measure_input_wide <-
function(recipe,
...,
role = "measure",
trained = FALSE,
columns = NULL,
location_values = NULL,
skip = FALSE,
id = rand_id("measure_input_wide")) {
add_step(
recipe,
step_measure_input_wide_new(
terms = enquos(...),
trained = trained,
role = role,
columns = columns,
location_values = location_values,
skip = skip,
id = id
)
function(recipe,
...,
role = "measure",
trained = FALSE,
columns = NULL,
location_values = NULL,
skip = FALSE,
id = rand_id("measure_input_wide")) {
add_step(
recipe,
step_measure_input_wide_new(
terms = enquos(...),
trained = trained,
role = role,
columns = columns,
location_values = location_values,
skip = skip,
id = id
)
}
)
}

step_measure_input_wide_new <-
function(terms, role, trained, columns, location_values, na_rm, skip, id) {
function(terms, role, trained, columns, location_values, skip, id) {
step(
subclass = "measure_input_wide",
terms = terms,
Expand All @@ -111,8 +111,10 @@ prep.step_measure_input_wide <- function(x, training, info = NULL, ...) {
num_inputs <- length(col_names)
num_loc <- length(x$location_values)
if (num_inputs != num_loc) {
msg <- paste0(num_inputs, " columns were selected as inputs but ",
"`location_values` has ", num_loc, " values.")
msg <- paste0(
num_inputs, " columns were selected as inputs but ",
"`location_values` has ", num_loc, " values."
)
rlang::abort(msg)
}
# if
Expand All @@ -133,7 +135,6 @@ prep.step_measure_input_wide <- function(x, training, info = NULL, ...) {

#' @export
bake.step_measure_input_wide <- function(object, new_data, ...) {

# TODO check to make sure that the nested tibble has the same number of rows
# in case the nesting was bad
wide_to_list(new_data, object$location_values, object$terms)
Expand All @@ -151,12 +152,16 @@ print.step_measure_input_wide <-
#' @export
tidy.step_measure_input_wide <- function(x, ...) {
if (is_trained(x)) {
res <- tibble(terms = x$columns,
value = na_dbl)
res <- tibble(
terms = x$columns,
value = na_dbl
)
} else {
term_names <- sel2char(x$terms)
res <- tibble(terms = term_names,
value = na_dbl)
res <- tibble(
terms = term_names,
value = na_dbl
)
}
res$id <- x$id
res
Expand Down
Loading

0 comments on commit cdc7b5a

Please sign in to comment.