Skip to content

Commit

Permalink
fix issues with NA values in recodes
Browse files Browse the repository at this point in the history
  • Loading branch information
strengejacke committed Sep 7, 2023
1 parent 10599b2 commit 7d35261
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 3 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: datawizard
Title: Easy Data Wrangling and Statistical Transformations
Version: 0.8.0.8
Version: 0.8.0.9
Authors@R: c(
person("Indrajeet", "Patil", , "[email protected]", role = "aut",
comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")),
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ BUG FIXES
* Fixed issue in `recode_into()` with probably wrong case number printed in the
warning when several recode patterns match to one case.

* Fixed issue in `recode_into()` when original data contained `NA` values and
`NA` was not included in the recode pattern.

* Fixed issue in `data_filter()` where functions containing a `=` (e.g. when
naming arguments, like `grepl(pattern, x = a)`) were mistakenly seen as
faulty syntax.
Expand Down
12 changes: 11 additions & 1 deletion R/recode_into.r
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,11 @@ recode_into <- function(..., data = NULL, default = NA, overwrite = TRUE, verbos
index <- with(data, eval(dots[[i]][[2]]))
value <- with(data, eval(dots[[i]][[3]]))
}
# remember missing values, so we can add back later
missing_index <- is.na(index)
# make sure index has no missing values. when we have missing values in
# original expression, these are considered as "no match" and set to FALSE
index[is.na(index)] <- FALSE
# overwriting values? do more recode-patterns match the same case?
if (is.na(default)) {
already_exists <- !is.na(out[index])
Expand All @@ -144,7 +149,7 @@ recode_into <- function(..., data = NULL, default = NA, overwrite = TRUE, verbos
# save indices of overwritten cases
overwritten_cases <- which(index)[already_exists]
# tell user...
if (any(already_exists) && verbose) {
if (any(already_exists, na.rm = TRUE) && verbose) {
if (overwrite) {
msg <- paste(
"Several recode patterns apply to the same cases.",
Expand All @@ -164,7 +169,12 @@ recode_into <- function(..., data = NULL, default = NA, overwrite = TRUE, verbos
if (!overwrite) {
index[overwritten_cases] <- FALSE
}
# write new values into output vector
out[index] <- value
# set back missing values
if (any(missing_index) && !is.na(default)) {
out[missing_index & out == default] <- NA
}
}

out
Expand Down
34 changes: 33 additions & 1 deletion tests/testthat/test-recode_into.R
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ test_that("recode_into, check differen input length", {
)
})

test_that("recode_into, check differen input length", {
test_that("recode_into, check different input length", {
x <- 1:5
y <- c(5, 2, 3, 1, 4)
expect_warning(
Expand All @@ -184,3 +184,35 @@ test_that("recode_into, check differen input length", {
regexp = "Several recode patterns"
)
})

test_that("recode_into, make sure recode works with missing in original variable", {
mtcars$mpg[c(3, 10, 12, 15, 16)] <- NA
mtcars$cyl[c(2, 15, 16)] <- NA
out1 <- recode_into(
mtcars$mpg > 20 & mtcars$cyl == 6 ~ 1,
mtcars$mpg <= 20 ~ 2,
mtcars$cyl == 4 ~ 3,
default = 0
)
out2 <- recode_into(
mtcars$mpg > 20 & mtcars$cyl == 6 ~ 1,
mtcars$mpg <= 20 ~ 2,
default = 0
)
# one NA in mpg is overwritten by valid value from cyl, total 5 NA
expect_identical(
out1,
c(
1, NA, 3, 1, 2, 2, 2, 3, 3, NA, 2, NA, 2, 2, NA, NA, 2, 3,
3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 2, 2, 2, 3
)
)
# total 6 NA
expect_identical(
out2,
c(
1, NA, NA, 1, 2, 2, 2, 0, 0, NA, 2, NA, 2, 2, NA, NA, 2, 0,
0, 0, 0, 2, 2, 2, 2, 0, 0, 0, 2, 2, 2, 0
)
)
})

0 comments on commit 7d35261

Please sign in to comment.