easystats · etiennebacher · Sep 11, 2024 · Sep 9, 2024 · Sep 9, 2024 · Sep 9, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: datawizard
 Title: Easy Data Wrangling and Statistical Transformations
-Version: 0.12.3.2
+Version: 0.12.3.3
 Authors@R: c(
     person("Indrajeet", "Patil", , "[email protected]", role = "aut",
            comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")),
@@ -76,3 +76,4 @@ RoxygenNote: 7.3.2
 Config/testthat/edition: 3
 Config/testthat/parallel: true
 Config/Needs/website: easystats/easystatstemplate
+Remotes: easystats/insight
diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,10 @@
 # datawizard (development)
 
+BREAKING CHANGES
+
+* `data_rename()` now errors when the `replacement` argument contains `NA` values
+  or empty strings (#539).
+
 CHANGES
 
 * The `pattern` argument in `data_rename()` can also be a named vector. In this

diff --git a/R/data_rename.R b/R/data_rename.R
@@ -16,11 +16,12 @@
 #'   string, which will be added as prefix or suffix to the column names. For
 #'   `data_rename()`, `pattern` can also be a named vector. In this case, names
 #'   are used as values for the `replacement` argument (i.e. `pattern` can be a
-#'   character vector using `<new name> = "<old name>"`).
+#'   character vector using `<new name> = "<old name>"` and argument `replacement`
+#'   will be ignored then).
 #' @param replacement Character vector. Indicates the new name of the columns
 #'   selected in `pattern`. Can be `NULL` (in which case column are numbered
 #'   in sequential order). If not `NULL`, `pattern` and `replacement` must be
-#'   of the same length.
+#'   of the same length. If `pattern` is a named vector, `replacement` is ignored.
 #' @param rows Vector of row names.
 #' @param safe Do not throw error if for instance the variable to be
 #'   renamed/removed doesn't exist.
@@ -44,14 +45,13 @@
 #'
 #' # Change all
 #' head(data_rename(iris, replacement = paste0("Var", 1:5)))
-#'
 #' @seealso
 #' - Functions to rename stuff: [data_rename()], [data_rename_rows()], [data_addprefix()], [data_addsuffix()]
 #' - Functions to reorder or remove columns: [data_reorder()], [data_relocate()], [data_remove()]
 #' - Functions to reshape, pivot or rotate data frames: [data_to_long()], [data_to_wide()], [data_rotate()]
 #' - Functions to recode data: [rescale()], [reverse()], [categorize()],
 #'   [recode_values()], [slide()]
 #' - Functions to standardize, normalize, rank-transform: [center()], [standardize()], [normalize()], [ranktransform()], [winsorize()]
 #' - Split and merge data frames: [data_partition()], [data_merge()]
 #' - Functions to find or select columns: [data_select()], [extract_column_names()]
 #' - Functions to filter rows: [data_match()], [data_filter()]
@@ -82,6 +82,34 @@
     replacement <- paste0(seq_along(pattern))
   }
 
+  # coerce to character
+  replacement <- as.character(replacement)
+
+  # check if `replacement` has no empty strings and no NA values
+  invalid_replacement <- is.na(replacement) | !nzchar(replacement)
+  if (any(invalid_replacement)) {
+    if (is.null(names(pattern))) {
+      # when user did not match `pattern` with `replacement`
+      msg <- c(
+        "`replacement` is not allowed to have `NA` or empty strings.",
+        sprintf(
+          "Following values in `pattern` have no match in `replacement`: %s",
+          toString(pattern[invalid_replacement])
+        )
+      )
+    } else {
+      # when user did not name all elements of `pattern`
+      msg <- c(
+        "Either name all elements of `pattern` or use `replacement`.",
+        sprintf(
+          "Following values in `pattern` were not named: %s",
+          toString(pattern[invalid_replacement])
+        )
+      )
+    }
+    insight::format_error(msg)
+  }
+
   # if duplicated names in replacement, append ".2", ".3", etc. to duplicates
   # ex: c("foo", "foo") -> c("foo", "foo.2")
   if (anyDuplicated(replacement) > 0L) {

diff --git a/R/standardize.models.R b/R/standardize.models.R
@@ -96,10 +96,10 @@
                                weights = TRUE,
                                verbose = TRUE,
                                include_response = TRUE,
                                update_expr,
                                ...) {
  m_info <- .get_model_info(x, ...)
  data <- insight::get_data(x, source = "mf", verbose = FALSE)

  if (isTRUE(attr(data, "is_subset"))) {
    insight::format_error("Cannot standardize a model fit with a 'subset = '.")
@@ -197,7 +197,7 @@
 
   ## ---- STANDARDIZE! ----
 
-  w <- insight::get_weights(x, na_rm = TRUE)
+  w <- insight::get_weights(x, remove_na = TRUE)
 
   data_std <- standardize(data[do_standardize],
     robust = robust,
@@ -365,7 +365,7 @@


  if (!is.null(covs)) {
    covs <- mapply(.rescale_fixed_values, covs, names(covs),
      SIMPLIFY = FALSE,
      MoreArgs = list(
        y_data = y_data, m_data = m_data,
@@ -391,7 +391,7 @@
  #
  #   control.value <- temp_vals[1]
  #   treat.value <- temp_vals[2]
  #   if (verbose) insight::format_alert("control and treatment values have been rescaled to their standardized scales.")
  # }

  if (verbose && !all(c(control.value, treat.value) %in% c(0, 1))) {
@@ -402,7 +402,7 @@
  }


  text <- utils::capture.output({
    model_std <- stats::update(x,
      model.y = y_std, model.m = m_std,
      # control.value = control.value, treat.value = treat.value
@@ -476,7 +476,7 @@

  # check if model has a response variable that should not be standardized.
  info$is_linear &&
    !info$family == "inverse.gaussian" &&
    !info$is_survival &&
    !info$is_censored


diff --git a/man/data_rename.Rd b/man/data_rename.Rd
diff --git a/man/text_format.Rd b/man/text_format.Rd
diff --git a/tests/testthat/test-data_rename.R b/tests/testthat/test-data_rename.R
@@ -28,11 +28,26 @@ test_that("data_rename returns a data frame", {
 test_that("data_rename: pattern must be of type character", {
   expect_error(
     data_rename(test, pattern = 1),
-    regexp = "Argument `pattern` must be of type character."
+    regexp = "Argument `pattern` must be of type character"
   )
   expect_error(
     data_rename(test, pattern = TRUE),
-    regexp = "Argument `pattern` must be of type character."
+    regexp = "Argument `pattern` must be of type character"
+  )
+})
+
+test_that("data_rename: replacement not allowed to have NA or empty strings", {
+  expect_error(
+    data_rename(test, pattern = c(test = "Species", "Sepal.Length")),
+    regexp = "Either name all elements of `pattern`"
+  )
+  expect_error(
+    data_rename(
+      test,
+      pattern = c("Species", "Sepal.Length"),
+      replacement = c("foo", NA_character_)
+    ),
+    regexp = "`replacement` is not allowed"
   )
 })