From ec161a269e568c95659c2ebdd77529c8445ea4d0 Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 27 Nov 2023 20:13:01 +0100 Subject: [PATCH] Fix `to_numeric()` with inversed factor levels (#469) * Fix invers levels for to_numeric() * news * lintr * wordlist * lintr * lintr --- DESCRIPTION | 2 +- NEWS.md | 7 ++++++- R/data_tabulate.R | 24 ++++++++++++------------ R/to_numeric.R | 19 +++++++++++++------ inst/WORDLIST | 1 + tests/testthat/test-data_to_numeric.R | 19 ++++++++++++++----- 6 files changed, 47 insertions(+), 25 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e83fcafc2..d930f6fe8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: datawizard Title: Easy Data Wrangling and Statistical Transformations -Version: 0.9.0.4 +Version: 0.9.0.5 Authors@R: c( person("Indrajeet", "Patil", , "patilindrajeet.science@gmail.com", role = "aut", comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")), diff --git a/NEWS.md b/NEWS.md index d799a0d5b..e2f2ecdff 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# datawizard 0.9.0.9000 (development version) +# datawizard 0.9.0.x (development version) CHANGES @@ -7,6 +7,11 @@ CHANGES * `to_factor()` and `to_numeric()` now support class `haven_labelled`. +BUG FIXES + +* `to_numeric()` now correctly deals with inversed factor levels when + `preserve_levels = TRUE`. + # datawizard 0.9.0 NEW FUNCTIONS diff --git a/R/data_tabulate.R b/R/data_tabulate.R index 3b16932a3..7a56e6a87 100644 --- a/R/data_tabulate.R +++ b/R/data_tabulate.R @@ -179,10 +179,10 @@ data_tabulate.grouped_df <- function(x, for (i in seq_along(grps)) { rows <- grps[[i]] # save information about grouping factors - if (!is.null(group_variables)) { - group_variable <- group_variables[i, , drop = FALSE] - } else { + if (is.null(group_variables)) { group_variable <- NULL + } else { + group_variable <- group_variables[i, , drop = FALSE] } out <- c(out, data_tabulate( data_filter(x, rows), @@ -226,7 +226,7 @@ format.dw_data_tabulate <- function(x, format = "text", big_mark = NULL, ...) { # format data frame ftab <- insight::format_table(x, ...) ftab[] <- lapply(ftab, function(i) { - i[i == ""] <- ifelse(identical(format, "text"), "", "(NA)") + i[i == ""] <- ifelse(identical(format, "text"), "", "(NA)") # nolint i }) ftab$N <- gsub("\\.00$", "", ftab$N) @@ -347,10 +347,10 @@ print.dw_data_tabulates <- function(x, big_mark = NULL, ...) { } } else { x <- lapply(x, function(i) { - attr <- attributes(i) + i_attr <- attributes(i) i <- format(i, format = "text", big_mark = big_mark, ...) - i$Variable[attr$duplicate_varnames] <- "" - if (!is.null(i$Group)) i$Group[attr$duplicate_varnames] <- "" + i$Variable[i_attr$duplicate_varnames] <- "" + if (!is.null(i$Group)) i$Group[i_attr$duplicate_varnames] <- "" i[nrow(i) + 1, ] <- "" i }) @@ -375,9 +375,9 @@ print_html.dw_data_tabulates <- function(x, big_mark = NULL, ...) { print_html(x[[1]], big_mark = big_mark, ...) } else { x <- lapply(x, function(i) { - attr <- attributes(i) + i_attr <- attributes(i) i <- format(i, format = "html", big_mark = big_mark, ...) - i$Variable[attr$duplicate_varnames] <- "" + i$Variable[i_attr$duplicate_varnames] <- "" i }) @@ -401,10 +401,10 @@ print_md.dw_data_tabulates <- function(x, big_mark = NULL, ...) { print_md(x[[1]], big_mark = big_mark, ...) } else { x <- lapply(x, function(i) { - attr <- attributes(i) + i_attr <- attributes(i) i <- format(i, format = "markdown", big_mark = big_mark, ...) - i$Variable[attr$duplicate_varnames] <- "" - if (!is.null(i$Group)) i$Group[attr$duplicate_varnames] <- "" + i$Variable[i_attr$duplicate_varnames] <- "" + if (!is.null(i$Group)) i$Group[i_attr$duplicate_varnames] <- "" i[nrow(i) + 1, ] <- "" i }) diff --git a/R/to_numeric.R b/R/to_numeric.R index 80fb7db27..9a35f9130 100644 --- a/R/to_numeric.R +++ b/R/to_numeric.R @@ -74,7 +74,7 @@ to_numeric.data.frame <- function(x, return(x) } - attr <- attributes(x) + df_attr <- attributes(x) # evaluate arguments select <- .select_nse(select, @@ -91,7 +91,7 @@ to_numeric.data.frame <- function(x, # drop numerics, when append is not FALSE select <- colnames(x[select])[!vapply(x[select], is.numeric, FUN.VALUE = logical(1L))] # process arguments - args <- .process_append( + fun_args <- .process_append( x, select, append, @@ -99,8 +99,8 @@ to_numeric.data.frame <- function(x, keep_factors = TRUE ) # update processed arguments - x <- args$x - select <- args$select + x <- fun_args$x + select <- fun_args$select } out <- sapply( @@ -129,7 +129,7 @@ to_numeric.data.frame <- function(x, } # due to the special handling of dummy factors, we need to take care - # of appending the data here again. usually, "args$x" includes the appended + # of appending the data here again. usually, "fun_args$x" includes the appended # data, which does not work here... if (!isFALSE(append)) { @@ -141,7 +141,7 @@ to_numeric.data.frame <- function(x, } # add back custom attributes - out <- .replace_attrs(out, attr) + out <- .replace_attrs(out, df_attr) out } @@ -226,6 +226,13 @@ to_numeric.factor <- function(x, } names(out) <- levels(x) } else if (preserve_levels) { + if (is.unsorted(levels(x))) { + x_inverse <- rep(NA_real_, length(x)) + for (i in 1:nlevels(x)) { + x_inverse[x == levels(x)[i]] <- as.numeric(levels(x)[nlevels(x) - i + 1]) + } + x <- factor(x_inverse) + } out <- .set_back_labels(as.numeric(as.character(x)), x) } else { out <- .set_back_labels(as.numeric(x), x) diff --git a/inst/WORDLIST b/inst/WORDLIST index 01e138dd1..d48430478 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -62,6 +62,7 @@ ggplot's https interpretability interpretable +inversed joss labelled labelling diff --git a/tests/testthat/test-data_to_numeric.R b/tests/testthat/test-data_to_numeric.R index df43f401f..3e0a9d095 100644 --- a/tests/testthat/test-data_to_numeric.R +++ b/tests/testthat/test-data_to_numeric.R @@ -44,7 +44,6 @@ test_that("convert factor to numeric", { expect_snapshot(to_numeric(f)) }) - test_that("convert factor to numeric", { expect_identical(to_numeric(c("abc", "xyz")), c(1, 2)) expect_identical(to_numeric(c("123", "789")), c(123, 789)) @@ -52,7 +51,6 @@ test_that("convert factor to numeric", { expect_identical(to_numeric(c("1L", "2e-3", "ABC")), c(1, 2, 3)) }) - test_that("convert factor to numeric, dummy factors", { expect_identical( to_numeric(c("abc", "xyz"), dummy_factors = TRUE), @@ -66,7 +64,6 @@ test_that("convert factor to numeric, dummy factors", { ) }) - test_that("convert factor to numeric, append", { data(efc) expect_identical( @@ -94,13 +91,11 @@ test_that("convert factor to numeric, append", { ) }) - test_that("convert factor to numeric, all numeric", { data(mtcars) expect_identical(to_numeric(mtcars), mtcars) }) - test_that("convert factor to numeric, dummy factors, with NA", { x1 <- factor(rep(c("a", "b"), 3)) x2 <- factor(c("a", NA_character_, "a", "b", "a", "b")) @@ -153,6 +148,20 @@ test_that("convert factor to numeric, dummy factors, with NA", { expect_identical(nrow(to_numeric(x7, dummy_factors = TRUE)), length(x7)) }) +test_that("to_numeric, inverse factor levels", { + f <- c(0, 0, 1, 1, 1, 0) + x1 <- factor(f, levels = c(0, 1)) + x2 <- factor(f, levels = c(1, 0)) + out <- to_numeric(x1, dummy_factors = FALSE, preserve_levels = FALSE) + expect_identical(out, c(1, 1, 2, 2, 2, 1)) + out <- to_numeric(x2, dummy_factors = FALSE, preserve_levels = FALSE) + expect_identical(out, c(2, 2, 1, 1, 1, 2)) + out <- to_numeric(x1, dummy_factors = FALSE, preserve_levels = TRUE) + expect_identical(out, c(0, 0, 1, 1, 1, 0)) + out <- to_numeric(x2, dummy_factors = FALSE, preserve_levels = TRUE) + expect_identical(out, c(1, 1, 0, 0, 0, 1)) +}) + # select helpers ------------------------------ test_that("to_numeric regex", { expect_identical(