Skip to content

Commit

Permalink
improve handling
Browse files Browse the repository at this point in the history
  • Loading branch information
strengejacke committed Sep 7, 2023
1 parent 1b3b825 commit 35c4f13
Show file tree
Hide file tree
Showing 3 changed files with 106 additions and 27 deletions.
57 changes: 48 additions & 9 deletions R/recode_into.r
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,13 @@
#' recode patterns. If `FALSE`, former recoded cases will not be altered by later
#' recode patterns that would apply to those cases again. A warning message is
#' printed to alert such situations and to avoid unintentional recodings.
#' @param preserve_na Logical, if `TRUE` (default) and `default` is not `NA`,
#' missing values in the original variable will be set back to `NA` in the
#' recoded variable (unless overwritten by other recode patterns). If `FALSE`,
#' missing values in the original variable will be recoded to `default`.
#' @param preserve_na Logical, if `TRUE` and `default` is not `NA`, missing
#' values in the original variable will be set back to `NA` in the recoded
#' variable (unless overwritten by other recode patterns). If `FALSE`, missing
#' values in the original variable will be recoded to `default`. The latter
#' behaviour prevents unintentional overwriting of missing values with `default`,
#' which means that you won't find valid values where the original data only
#' had missing values. See 'Examples'.
#' @param verbose Toggle warnings.
#'
#' @return A vector with recoded values.
Expand Down Expand Up @@ -76,12 +79,37 @@
#' data = d,
#' default = 0
#' )
#'
#' # handling of missing values
#' d <- data.frame(
#' x = c(1, NA, 2, NA, 3, 4),
#' y = c(1, 11, 3, NA, 5, 6)
#' )
#' # first NA in x is overwritten by valid value from y
#' # we have no known value for second NA in x and y,
#' # thus we get one NA in the result
#' recode_into(
#' x <= 3 ~ 1,
#' y > 5 ~ 2,
#' data = d,
#' default = 0,
#' preserve_na = TRUE
#' )
#' # first NA in x is overwritten by valid value from y
#' # default value is used for second NA
#' recode_into(
#' x <= 3 ~ 1,
#' y > 5 ~ 2,
#' data = d,
#' default = 0,
#' preserve_na = FALSE
#' )
#' @export
recode_into <- function(...,
data = NULL,
default = NA,
overwrite = TRUE,
preserve_na = TRUE,
preserve_na = FALSE,
verbose = TRUE) {
dots <- list(...)

Expand Down Expand Up @@ -133,6 +161,9 @@ recode_into <- function(...,
)
}

# indicator to show message when replacing NA by default
# needed to show message only once
overwrite_NA_msg <- TRUE

# iterate all expressions
for (i in seq_len(n_params)) {
Expand Down Expand Up @@ -182,10 +213,18 @@ recode_into <- function(...,
# write new values into output vector
out[index] <- value
# set back missing values
if (any(missing_index) && !is.na(default) && preserve_na) {
# but only where we still have default values
# we don't want to overwrite already recoded values with NA
out[missing_index & out == default] <- NA
if (any(missing_index) && !is.na(default)) {
if (preserve_na) {
# but only where we still have default values
# we don't want to overwrite already recoded values with NA
out[missing_index & out == default] <- NA
} else if (overwrite_NA_msg && verbose) {
# don't show msg again
overwrite_NA_msg <- FALSE
insight::format_alert(
"Missing values in original variable are overwritten by default value. If you want to preserve missing values, set `preserve_na = TRUE`."
)
}
}
}

Expand Down
38 changes: 33 additions & 5 deletions man/recode_into.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

38 changes: 25 additions & 13 deletions tests/testthat/test-recode_into.R
Original file line number Diff line number Diff line change
Expand Up @@ -194,25 +194,37 @@ test_that("recode_into, make sure recode works with missing in original variable
d_recode_na$mpg > 20 & d_recode_na$cyl == 6 ~ 1,
d_recode_na$mpg <= 20 ~ 2,
d_recode_na$cyl == 4 ~ 3,
default = 0
default = 0,
preserve_na = TRUE
)
out2_recoded_na <- recode_into(
d_recode_na$mpg > 20 & d_recode_na$cyl == 6 ~ 1,
d_recode_na$mpg <= 20 ~ 2,
default = 0
)
out3_recoded_na <- recode_into(
d_recode_na$mpg > 20 & d_recode_na$cyl == 6 ~ 1,
d_recode_na$mpg <= 20 ~ 2,
d_recode_na$cyl == 4 ~ 3,
default = 0,
preserve_na = FALSE
preserve_na = TRUE
)
out4_recoded_na <- recode_into(
d_recode_na$mpg > 20 & d_recode_na$cyl == 6 ~ 1,
d_recode_na$mpg <= 20 ~ 2,
default = 0,
preserve_na = FALSE
expect_message(
{
out3_recoded_na <- recode_into(
d_recode_na$mpg > 20 & d_recode_na$cyl == 6 ~ 1,
d_recode_na$mpg <= 20 ~ 2,
d_recode_na$cyl == 4 ~ 3,
default = 0,
preserve_na = FALSE
)
},
regex = "Missing values in original variable"
)
expect_message(
{
out4_recoded_na <- recode_into(
d_recode_na$mpg > 20 & d_recode_na$cyl == 6 ~ 1,
d_recode_na$mpg <= 20 ~ 2,
default = 0,
preserve_na = FALSE
)
},
regex = "Missing values in original variable"
)
# one NA in mpg is overwritten by valid value from cyl, total 5 NA
expect_identical(
Expand Down

0 comments on commit 35c4f13

Please sign in to comment.