Preserve correct label oder (#473)

* Preserve correct label oder * update news * capture more exceptions * lintr * lintr * update lintr
easystats · Dec 18, 2023 · 0e3991b · 0e3991b
1 parent 5f5c9c1
commit 0e3991b
Show file tree

Hide file tree

Showing 8 changed files with 109 additions and 39 deletions.
diff --git a/.lintr b/.lintr
@@ -13,6 +13,7 @@ linters: linters_with_defaults(
     todo_comment_linter = NULL,
     undesirable_function_linter(c("mapply" = NA, "sapply" = NA, "setwd" = NA)),
     undesirable_operator_linter = NULL,
+    if_not_else_linter(exceptions = character(0L)),
     unnecessary_concatenation_linter(allow_single_expression = FALSE),
     defaults = linters_with_tags(tags = NULL)
   )
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: datawizard
 Title: Easy Data Wrangling and Statistical Transformations
-Version: 0.9.0.6
+Version: 0.9.0.7
 Authors@R: c(
     person("Indrajeet", "Patil", , "[email protected]", role = "aut",
            comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")),

diff --git a/NEWS.md b/NEWS.md
@@ -12,6 +12,8 @@ BUG FIXES
 * `to_numeric()` now correctly deals with inversed factor levels when
   `preserve_levels = TRUE`.
 
+* `to_numeric()` inversed order of value labels when `dummy_factors = FALSE`.
+
 * `convert_to_na()` now preserves attributes for factors when `drop_levels = TRUE`.
 
 # datawizard 0.9.0

diff --git a/R/data_reverse.R b/R/data_reverse.R
@@ -93,24 +93,24 @@ reverse.numeric <- function(x,
   }
 
   # old minimum and maximum
-  min <- min(range)
-  max <- max(range)
+  min_value <- min(range)
+  max_value <- max(range)
 
   # check if a valid range (i.e. vector of length 2) is provided
   if (length(range) > 2) {
     insight::format_error(
       "`range` must be a numeric vector of length two, indicating lowest and highest value of the required range.",
-      sprintf("Did you want to provide `range = c(%g, %g)`?", min, max)
+      sprintf("Did you want to provide `range = c(%g, %g)`?", min_value, max_value)
     )
   }
 
-  new_min <- max
-  new_max <- min
+  new_min <- max_value
+  new_max <- min_value
 
-  out <- as.vector((new_max - new_min) / (max - min) * (x - min) + new_min)
+  out <- as.vector((new_max - new_min) / (max_value - min_value) * (x - min_value) + new_min)
 
   # labelled data?
-  out <- .set_back_labels(out, x)
+  out <- .set_back_labels(out, x, reverse_values = TRUE)
   out
 }
 
@@ -134,7 +134,9 @@ reverse.factor <- function(x, range = NULL, verbose = TRUE, ...) {
   # save for later use
   original_x <- x
 
-  if (!is.null(range)) {
+  if (is.null(range)) {
+    old_levels <- levels(x)
+  } else {
     # no missing values allowed
     if (anyNA(range)) {
       insight::format_error("`range` is not allowed to have missing values.")
@@ -180,16 +182,14 @@ reverse.factor <- function(x, range = NULL, verbose = TRUE, ...) {
     }
     old_levels <- range
     x <- factor(x, levels = range)
-  } else {
-    old_levels <- levels(x)
   }
 
   int_x <- as.integer(x)
   rev_x <- reverse(int_x, range = c(1, length(old_levels)))
   x <- factor(rev_x, levels = seq_len(length(old_levels)), labels = old_levels)
 
   # labelled data?
-  x <- .set_back_labels(x, original_x)
+  x <- .set_back_labels(x, original_x, reverse_values = TRUE)
 
   x
 }
@@ -225,16 +225,16 @@ reverse.grouped_df <- function(x,
   # create the new variables and updates "select", so new variables are processed
   if (!isFALSE(append)) {
     # process arguments
-    args <- .process_append(
+    arguments <- .process_append(
       x,
       select,
       append,
       append_suffix = "_r",
       preserve_value_labels = TRUE
     )
     # update processed arguments
-    x <- args$x
-    select <- args$select
+    x <- arguments$x
+    select <- arguments$select
   }
 
   x <- as.data.frame(x)
@@ -279,16 +279,16 @@ reverse.data.frame <- function(x,
   # create the new variables and updates "select", so new variables are processed
   if (!isFALSE(append)) {
     # process arguments
-    args <- .process_append(
+    arguments <- .process_append(
       x,
       select,
       append,
       append_suffix = "_r",
       preserve_value_labels = TRUE
     )
     # update processed arguments
-    x <- args$x
-    select <- args$select
+    x <- arguments$x
+    select <- arguments$select
   }
 
   # Transform the range so that it is a list now

diff --git a/R/to_numeric.R b/R/to_numeric.R
@@ -148,7 +148,7 @@ to_numeric.data.frame <- function(x,
 
 #' @export
 to_numeric.numeric <- function(x, verbose = TRUE, ...) {
-  .set_back_labels(as.numeric(x), x)
+  .set_back_labels(as.numeric(x), x, reverse_values = FALSE)
 }
 
 #' @export
@@ -233,9 +233,9 @@ to_numeric.factor <- function(x,
       }
       x <- factor(x_inverse)
     }
-    out <- .set_back_labels(as.numeric(as.character(x)), x)
+    out <- .set_back_labels(as.numeric(as.character(x)), x, reverse_values = FALSE)
   } else {
-    out <- .set_back_labels(as.numeric(x), x)
+    out <- .set_back_labels(as.numeric(x), x, reverse_values = FALSE)
   }
 
   # shift to requested starting value

diff --git a/R/utils_labels.R b/R/utils_labels.R
@@ -3,12 +3,29 @@
 # to the transformed vector
 
 #' @keywords internal
-.set_back_labels <- function(new, old, include_values = TRUE) {
+.set_back_labels <- function(new, old, include_values = TRUE, reverse_values = FALSE) {
   # labelled data?
   attr(new, "label") <- attr(old, "label", exact = TRUE)
-  labels <- attr(old, "labels", exact = TRUE)
-  if (isTRUE(include_values) && !is.null(labels)) {
-    attr(new, "labels") <- stats::setNames(rev(labels), names(labels))
+  value_labels <- attr(old, "labels", exact = TRUE)
+  # "include_values" is used to preserve value labels
+  if (isTRUE(include_values) && !is.null(value_labels)) {
+    if (reverse_values) {
+      # reverse values? Used for "reverse_scale()"
+      attr(new, "labels") <- stats::setNames(rev(value_labels), names(value_labels))
+    } else {
+      # keep value oder? Used for "to_numeric()"
+      if (is.numeric(new)) {
+        if (any(grepl("[^0-9]", value_labels))) {
+          # if we have any non-numeric characters, convert to numeric
+          attr(new, "labels") <- stats::setNames(as.numeric(as.factor(value_labels)), names(value_labels))
+        } else {
+          # if we have numeric, or "numeric character" (like "1", "2", "3" etc.)
+          attr(new, "labels") <- stats::setNames(as.numeric(value_labels), names(value_labels))
+        }
+      } else {
+        attr(new, "labels") <- stats::setNames(value_labels, names(value_labels))
+      }
+    }
   } else if (isFALSE(include_values)) {
     attr(new, "labels") <- NULL
   }

diff --git a/tests/testthat/test-data_to_numeric.R b/tests/testthat/test-data_to_numeric.R
@@ -189,3 +189,34 @@ test_that("to_numeric works with haven_labelled, convert many labels correctly",
     expect_identical(as.vector(table(x)), c(180L, 506L, 156L))
   })
 })
+
+
+test_that("to_numeric preserves correct label order", {
+  x <- factor(c(1, 2, 3, 4))
+  x <- assign_labels(x, values = c("one", "two", "three", "four"))
+  out <- to_numeric(x, dummy_factors = FALSE)
+  expect_identical(
+    attributes(out)$labels,
+    c(one = 1, two = 2, three = 3, four = 4)
+  )
+  # correctly reverse scale
+  out <- to_numeric(reverse_scale(x), dummy_factors = FALSE)
+  expect_identical(
+    attributes(out)$labels,
+    c(one = 4, two = 3, three = 2, four = 1)
+  )
+  # factor with alphabetical values
+  x <- factor(letters[1:4])
+  x <- assign_labels(x, values = c("one", "two", "three", "four"))
+  out <- to_numeric(x, dummy_factors = FALSE)
+  expect_identical(
+    attributes(out)$labels,
+    c(one = 1, two = 2, three = 3, four = 4)
+  )
+  # correctly reverse scale
+  out <- to_numeric(reverse_scale(x), dummy_factors = FALSE)
+  expect_identical(
+    attributes(out)$labels,
+    c(one = 4, two = 3, three = 2, four = 1)
+  )
+})
diff --git a/tests/testthat/test-standardize_models.R b/tests/testthat/test-standardize_models.R
@@ -47,24 +47,28 @@ test_that("transformations", {
   fit_scale2 <- lm(scale(mpg) ~ scale(exp(hp_100)), mt)
   expect_equal(
     effectsize::standardize_parameters(fit_exp, method = "refit")[2, 2],
-    unname(coef(fit_scale1)[2])
+    unname(coef(fit_scale1)[2]),
+    ignore_attr = TRUE
   )
 
   expect_equal(
     effectsize::standardize_parameters(fit_exp, method = "basic")[2, 2],
-    unname(coef(fit_scale2)[2])
+    unname(coef(fit_scale2)[2]),
+    ignore_attr = TRUE
   )
 
   skip_if_not_installed("insight", minimum_version = "0.10.0")
   d <- data.frame(
     time = as.factor(c(1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5)),
     group = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2),
-    sum = c(0, 5, 10, 15, 20, 0, 20, 25, 45, 50, 0, 5, 10, 15, 20, 0, 20, 25, 45, 50, 0, 5, 10, 15, 20, 0, 20, 25, 45, 50)
+    sum = c(0, 5, 10, 15, 20, 0, 20, 25, 45, 50, 0, 5, 10, 15, 20, 0, 20, 25, 45, 50, 0, 5, 10, 15, 20, 0, 20, 25, 45, 50) # nolint
   )
   m <- lm(log(sum + 1) ~ as.numeric(time) * group, data = d)
 
 
-  expect_message(out <- standardize(m))
+  expect_message({
+    out <- standardize(m)
+  })
   expect_identical(coef(m), c(
     `(Intercept)` = -0.4575, `as.numeric(time)` = 0.5492, group = 0.3379,
     `as.numeric(time):group` = 0.15779
@@ -98,12 +102,14 @@ test_that("weights", {
   stdREFIT <- effectsize::standardize_parameters(m, method = "refit")
   expect_equal(
     stdREFIT[[2]],
-    effectsize::standardize_parameters(m, method = "posthoc")[[2]]
+    effectsize::standardize_parameters(m, method = "posthoc")[[2]],
+    ignore_attr = TRUE
   )
 
   expect_equal(
     stdREFIT[[2]],
-    effectsize::standardize_parameters(m, method = "basic")[[2]]
+    effectsize::standardize_parameters(m, method = "basic")[[2]],
+    ignore_attr = TRUE
   )
 })
 
@@ -230,7 +236,9 @@ test_that("standardize mediation", {
   )
 
   out1 <- summary(standardize(med1))
-  expect_message(out2 <- summary(standardize(med2)))
+  expect_message({
+    out2 <- summary(standardize(med2))
+  })
   expect_identical(unlist(out1[c("d0", "d1", "z0", "z1", "n0", "n1", "tau.coef")]),
     unlist(out2[c("d0", "d1", "z0", "z1", "n0", "n1", "tau.coef")]),
     tolerance = 0.1
@@ -266,13 +274,22 @@ test_that("offsets", {
 
   m <- lm(mpg ~ hp + offset(wt), data = mtcars)
 
-  expect_warning(mz1 <- standardize(m))
-  expect_warning(mz2 <- standardize(m, two_sd = TRUE))
+  expect_warning({
+    mz1 <- standardize(m)
+  })
+  expect_warning({
+    mz2 <- standardize(m, two_sd = TRUE)
+  })
   expect_identical(c(1, 2) * coef(mz1), coef(mz2))
 
 
   m <- glm(cyl ~ hp + offset(wt), family = poisson(), data = mtcars)
-  expect_warning(mz <- standardize(m), regexp = NA)
+  expect_warning(
+    {
+      mz <- standardize(m)
+    },
+    regexp = NA
+  )
 
   par1 <- parameters::model_parameters(mz)
   par2 <- effectsize::standardize_parameters(m, method = "basic")
@@ -288,10 +305,12 @@ test_that("brms", {
   skip_if_not_installed("brms")
 
   invisible(
-    capture.output(mod <- brms::brm(mpg ~ hp,
-      data = mtcars,
-      refresh = 0, chains = 1, silent = 2
-    ))
+    capture.output({
+      mod <- brms::brm(mpg ~ hp,
+        data = mtcars,
+        refresh = 0, chains = 1, silent = 2
+      )
+    })
   )
 
   expect_warning(