Merge branch 'main' into docs_smoothness

easystats · Sep 18, 2024 · 442689d · 442689d
2 parents e605941 + b4343f0
commit 442689d
Show file tree

Hide file tree

Showing 21 changed files with 1,039 additions and 477 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -48,3 +48,5 @@ references.bib
 ^CRAN-SUBMISSION$
 docs
 ^.dev$
+^vignettes/s.
+^vignettes/t.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: datawizard
 Title: Easy Data Wrangling and Statistical Transformations
-Version: 0.11.0.4
+Version: 0.12.3.4
 Authors@R: c(
     person("Indrajeet", "Patil", , "[email protected]", role = "aut",
            comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")),
@@ -21,10 +21,10 @@ Authors@R: c(
     person("Robert", "Garrett", , "[email protected]", role = "rev")
   )
 Maintainer: Etienne Bacher <[email protected]>
-Description: A lightweight package to assist in key steps involved in any data 
-    analysis workflow: (1) wrangling the raw data to get it in the needed form, 
-    (2) applying preprocessing steps and statistical transformations, and 
-    (3) compute statistical summaries of data properties and distributions. 
+Description: A lightweight package to assist in key steps involved in any data
+    analysis workflow: (1) wrangling the raw data to get it in the needed form,
+    (2) applying preprocessing steps and statistical transformations, and
+    (3) compute statistical summaries of data properties and distributions.
     It is also the data wrangling backend for packages in 'easystats' ecosystem.
     References: Patil et al. (2022) <doi:10.21105/joss.04684>.
 License: MIT + file LICENSE
@@ -33,10 +33,10 @@ BugReports: https://github.com/easystats/datawizard/issues
 Depends:
     R (>= 3.6)
 Imports:
-    insight (>= 0.20.0),
+    insight (>= 0.20.3),
     stats,
     utils
-Suggests: 
+Suggests:
     bayestestR,
     boot,
     brms,
@@ -49,7 +49,6 @@ Suggests:
     ggplot2 (>= 3.5.0),
     gt,
     haven,
-    htmltools,
     httr,
     knitr,
     lme4,
@@ -68,12 +67,13 @@ Suggests:
     tibble,
     tidyr,
     withr
-VignetteBuilder: 
+VignetteBuilder:
     knitr
 Encoding: UTF-8
 Language: en-US
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2
 Config/testthat/edition: 3
 Config/testthat/parallel: true
 Config/Needs/website: easystats/easystatstemplate
+Remotes: easystats/insight
diff --git a/NEWS.md b/NEWS.md
@@ -1,4 +1,43 @@
-# datawizard 0.11.0.1
+# datawizard (development)
+
+BREAKING CHANGES
+
+* `data_rename()` now errors when the `replacement` argument contains `NA` values
+  or empty strings (#539).
+
+CHANGES
+
+* The `pattern` argument in `data_rename()` can also be a named vector. In this
+  case, names are used as values for the `replacement` argument (i.e. `pattern`
+  can be a character vector using `<new name> = "<old name>"`).
+
+* Minor additions to `reshape_ci()` to work with forthcoming changes in the
+  `{bayestestR}` package.
+
+# datawizard 0.12.3
+
+CHANGES
+
+* `demean()` (and `degroup()`) now also work for nested designs, if argument
+  `nested = TRUE` and  `by` specifies more than one variable (#533).
+
+* Vignettes are no longer provided in the package, they are now only available
+  on the website. There is only one "Overview" vignette available in the package,
+  it contains links to the other vignettes on the website. This is because there
+  are CRAN errors occurring when building vignettes on macOS and we couldn't
+  determine the cause after multiple patch releases (#534).
+
+# datawizard 0.12.2
+
+* Remove `htmltools` from `Suggests` in an attempt of fixing an error in CRAN
+  checks due to failures to build a vignette (#528).
+
+# datawizard 0.12.1
+
+This is a patch release to fix one error on CRAN checks occurring because of a
+missing package namespace in one of the vignettes.
+
+# datawizard 0.12.0
 
 BREAKING CHANGES
 
@@ -21,6 +60,10 @@ CHANGES
   frame, where the first column contains name of the variable for which
   frequencies were calculated, and the second column contains the frequency table.
 
+* `demean()` (and `degroup()`) now also work for cross-classified designs, or
+  more generally, for data with multiple grouping or cluster variables (i.e.
+  `by` can now specify more than one variable).
+
 # datawizard 0.11.0
 
 BREAKING CHANGES
@@ -59,8 +102,8 @@ BREAKING CHANGES
 
 * The following arguments were deprecated in 0.5.0 and are now removed:
 
-  * in `data_to_wide()`: `colnames_from`, `rows_from`, `sep` 
-  * in `data_to_long()`: `colnames_to` 
+  * in `data_to_wide()`: `colnames_from`, `rows_from`, `sep`
+  * in `data_to_long()`: `colnames_to`
   * in `data_partition()`: `training_proportion`
 
 NEW FUNCTIONS
@@ -79,7 +122,7 @@ CHANGES
   argument, to compute weighted frequency tables. `include_na` allows to include
   or omit missing values from the table. Furthermore, a `by` argument was added,
   to compute crosstables (#479, #481).
-  
+
 # datawizard 0.9.1
 
 CHANGES
@@ -130,7 +173,7 @@ CHANGES
 
 * `unnormalize()` and `unstandardize()` now work with grouped data (#415).
 
-* `unnormalize()` now errors instead of emitting a warning if it doesn't have the 
+* `unnormalize()` now errors instead of emitting a warning if it doesn't have the
   necessary info (#415).
 
 BUG FIXES
@@ -153,7 +196,7 @@ BUG FIXES
 
 * Fixed issue in `data_filter()` where functions containing a `=` (e.g. when
   naming arguments, like `grepl(pattern, x = a)`) were mistakenly seen as
-  faulty syntax. 
+  faulty syntax.
 
 * Fixed issue in `empty_column()` for strings with invalid multibyte strings.
   For such data frames or files, `empty_column()` or `data_read()` no longer
@@ -190,14 +233,14 @@ CHANGES
 
 NEW FUNCTIONS
 
-* `rowid_as_column()` to complement `rownames_as_column()` (and to mimic 
-  `tibble::rowid_to_column()`). Note that its behavior is different from 
+* `rowid_as_column()` to complement `rownames_as_column()` (and to mimic
+  `tibble::rowid_to_column()`). Note that its behavior is different from
   `tibble::rowid_to_column()` for grouped data. See the Details section in the
   docs.
 
 * `data_unite()`, to merge values of multiple variables into one new variable.
 
-* `data_separate()`, as counterpart to `data_unite()`, to separate a single 
+* `data_separate()`, as counterpart to `data_unite()`, to separate a single
   variable into multiple new variables.
 
 * `data_modify()`, to create new variables, or modify or remove existing
@@ -220,7 +263,7 @@ BUG FIXES
 
 * `center()` and `standardize()` did not work for grouped data frames (of class
   `grouped_df`) when `force = TRUE`.
-  
+
 * The `data.frame` method of `describe_distribution()` returns `NULL` instead of
   an error if no valid variable were passed (for example a factor variable with
   `include_factors = FALSE`) (#421).
@@ -248,12 +291,12 @@ BUG FIXES
 
 # datawizard 0.7.0
 
-BREAKING CHANGES 
+BREAKING CHANGES
 
 * In selection patterns, expressions like `-var1:var3` to exclude all variables
   between `var1` and `var3` are no longer accepted. The correct expression is
   `-(var1:var3)`. This is for 2 reasons:
-  
+
   * to be consistent with the behavior for numerics (`-1:2` is not accepted but
     `-(1:2)` is);
   * to be consistent with `dplyr::select()`, which throws a warning and only
@@ -265,8 +308,8 @@ NEW FUNCTIONS
   or more variables into a new variable.
 
 * `mean_sd()` and `median_mad()` for summarizing vectors to their mean (or
-  median) and a range of one SD (or MAD) above and below.  
-  
+  median) and a range of one SD (or MAD) above and below.
+
 * `data_write()` as counterpart to `data_read()`, to write data frames into
   CSV, SPSS, SAS, Stata files and many other file types. One advantage over
   existing functions to write data in other packages is that labelled (numeric)
@@ -282,8 +325,8 @@ MINOR CHANGES
 
 * `data_rename()` gets a `verbose` argument.
 * `winsorize()` now errors if the threshold is incorrect (previously, it provided
-  a warning and returned the unchanged data). The argument `verbose` is now 
-  useless but is kept for backward compatibility. The documentation now contains   
+  a warning and returned the unchanged data). The argument `verbose` is now
+  useless but is kept for backward compatibility. The documentation now contains
   details about the valid values for `threshold` (#357).
 * In all functions that have arguments `select` and/or `exclude`, there is now
   one warning per misspelled variable. The previous behavior was to have only one
@@ -304,7 +347,7 @@ BUG FIXES
 * Fix unexpected warning in `convert_na_to()` when `select` is a list (#352).
 * Fixed issue with correct labelling of numeric variables with more than nine
   unique values and associated value labels.
-  
+
 
 # datawizard 0.6.5
 
@@ -336,7 +379,7 @@ NEW FUNCTIONS
 * `data_codebook()`: to generate codebooks of data frames.
 
 * New functions to deal with duplicates: `data_duplicated()` (keep all duplicates,
-  including the first occurrence) and `data_unique()` (returns the data, excluding 
+  including the first occurrence) and `data_unique()` (returns the data, excluding
   all duplicates except one instance of each, based on the selected method).
 
 MINOR CHANGES
@@ -346,15 +389,15 @@ MINOR CHANGES
 * The `include_bounds` argument in `normalize()` can now also be a numeric
   value, defining the limit to the upper and lower bound (i.e. the distance
   to 1 and 0).
-  
-* `data_filter()` now works with grouped data. 
+
+* `data_filter()` now works with grouped data.
 
 BUG FIXES
 
 * `data_read()` no longer prints message for empty columns when the data
   actually had no empty columns.
-  
- * `data_to_wide()` now drops columns that are not in `id_cols` (if specified), 
+
+ * `data_to_wide()` now drops columns that are not in `id_cols` (if specified),
   `names_from`, or `values_from`. This is the behaviour observed in `tidyr::pivot_wider()`.
 
 # datawizard 0.6.3
@@ -786,4 +829,3 @@ NEW FUNCTIONS
 # datawizard 0.1.0
 
 * First release.
-
diff --git a/R/data_rename.R b/R/data_rename.R
@@ -13,11 +13,15 @@
 #' @param pattern Character vector. For `data_rename()`, indicates columns that
 #'   should be selected for renaming. Can be `NULL` (in which case all columns
 #'   are selected). For `data_addprefix()` or `data_addsuffix()`, a character
-#'   string, which will be added as prefix or suffix to the column names.
+#'   string, which will be added as prefix or suffix to the column names. For
+#'   `data_rename()`, `pattern` can also be a named vector. In this case, names
+#'   are used as values for the `replacement` argument (i.e. `pattern` can be a
+#'   character vector using `<new name> = "<old name>"` and argument `replacement`
+#'   will be ignored then).
 #' @param replacement Character vector. Indicates the new name of the columns
 #'   selected in `pattern`. Can be `NULL` (in which case column are numbered
 #'   in sequential order). If not `NULL`, `pattern` and `replacement` must be
-#'   of the same length.
+#'   of the same length. If `pattern` is a named vector, `replacement` is ignored.
 #' @param rows Vector of row names.
 #' @param safe Do not throw error if for instance the variable to be
 #'   renamed/removed doesn't exist.
@@ -33,12 +37,14 @@
 #' head(data_rename(iris, "FakeCol", "length")) # This doesn't
 #' head(data_rename(iris, c("Sepal.Length", "Sepal.Width"), c("length", "width")))
 #'
+#' # use named vector to rename
+#' head(data_rename(iris, c(length = "Sepal.Length", width = "Sepal.Width")))
+#'
 #' # Reset names
 #' head(data_rename(iris, NULL))
 #'
 #' # Change all
 #' head(data_rename(iris, replacement = paste0("Var", 1:5)))
-#'
 #' @seealso
 #' - Functions to rename stuff: [data_rename()], [data_rename_rows()], [data_addprefix()], [data_addsuffix()]
 #' - Functions to reorder or remove columns: [data_reorder()], [data_relocate()], [data_remove()]
@@ -66,11 +72,44 @@ data_rename <- function(data,
     insight::format_error("Argument `pattern` must be of type character.")
   }
 
+  # check if `pattern` has names, and if so, use as "replacement"
+  if (!is.null(names(pattern))) {
+    replacement <- names(pattern)
+  }
+
   # name columns 1, 2, 3 etc. if no replacement
   if (is.null(replacement)) {
     replacement <- paste0(seq_along(pattern))
   }
 
+  # coerce to character
+  replacement <- as.character(replacement)
+
+  # check if `replacement` has no empty strings and no NA values
+  invalid_replacement <- is.na(replacement) | !nzchar(replacement)
+  if (any(invalid_replacement)) {
+    if (is.null(names(pattern))) {
+      # when user did not match `pattern` with `replacement`
+      msg <- c(
+        "`replacement` is not allowed to have `NA` or empty strings.",
+        sprintf(
+          "Following values in `pattern` have no match in `replacement`: %s",
+          toString(pattern[invalid_replacement])
+        )
+      )
+    } else {
+      # when user did not name all elements of `pattern`
+      msg <- c(
+        "Either name all elements of `pattern` or use `replacement`.",
+        sprintf(
+          "Following values in `pattern` were not named: %s",
+          toString(pattern[invalid_replacement])
+        )
+      )
+    }
+    insight::format_error(msg)
+  }
+
   # if duplicated names in replacement, append ".2", ".3", etc. to duplicates
   # ex: c("foo", "foo") -> c("foo", "foo.2")
   if (anyDuplicated(replacement) > 0L) {