From d9a40dcb0014a6428f56b8c1d4c42d5c482d4506 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 18 May 2024 22:43:52 +0200 Subject: [PATCH] Rename `group`/`group_by` arguments into `by` (#502) * https://github.com/easystats/easystats/issues/404 * update snapshots * lintr, comments * fix demean() * fix means_by_group * fix * update news * fix rescale_weights * silence tests * Update NEWS.md Co-authored-by: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> * deprecation warnings * use insight remotes * Update NEWS.md Co-authored-by: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> * Also address #265 * update docs * update docs and tests * Update extract_column_names.R * update readme * trigger CI * revert commits related to aliases * version * news * other remnants * fix * do not use devel pkgdown * lintr * same --------- Co-authored-by: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> --- DESCRIPTION | 4 +- NEWS.md | 11 ++++ R/data_codebook.R | 24 +++---- R/data_partition.R | 24 ++++--- R/data_xtabulate.R | 4 +- R/demean.R | 76 +++++++++++----------- R/means_by_group.R | 67 +++++++++++-------- R/recode_values.R | 6 +- R/rescale_weights.R | 33 ++++++---- R/select_nse.R | 19 +++--- R/skewness_kurtosis.R | 14 +++- R/text_format.R | 16 ++--- README.Rmd | 2 +- README.md | 2 +- man/data_partition.Rd | 13 ++-- man/demean.Rd | 29 +++++---- man/means_by_group.Rd | 25 +++++-- man/rescale_weights.Rd | 12 ++-- tests/testthat/_snaps/data_partition.md | 4 +- tests/testthat/_snaps/demean.md | 2 +- tests/testthat/_snaps/rescale_weights.md | 2 +- tests/testthat/_snaps/text_format.md | 10 +-- tests/testthat/test-attributes.R | 4 +- tests/testthat/test-data_partition.R | 4 +- tests/testthat/test-demean.R | 20 ++++-- tests/testthat/test-labelled_data.R | 8 +-- tests/testthat/test-recode_into.R | 6 +- tests/testthat/test-rescale_weights.R | 18 ++--- tests/testthat/test-standardize_datagrid.R | 14 ++-- tests/testthat/test-text_format.R | 10 +-- 30 files changed, 284 insertions(+), 199 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 20b803319..ec594a4e7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: datawizard Title: Easy Data Wrangling and Statistical Transformations -Version: 0.10.0.3 +Version: 0.10.0.4 Authors@R: c( person("Indrajeet", "Patil", , "patilindrajeet.science@gmail.com", role = "aut", comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")), @@ -69,7 +69,7 @@ Suggests: tidyr, withr Remotes: - easystats/modelbased + easystats/modelbased, easystats/insight VignetteBuilder: knitr Encoding: UTF-8 diff --git a/NEWS.md b/NEWS.md index f5e1a3543..1ceb53061 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,16 @@ # datawizard 0.10.1 +BREAKING CHANGES + +* Arguments named `group` or `group_by` are deprecated and will be removed + in a future release. Please use `by` instead. This affects the following + functions in *datawizard* (#502). + + * `data_partition()` + * `demean()` and `degroup()` + * `means_by_group()` + * `rescale_weights()` + CHANGES * `recode_into()` is more relaxed regarding checking the type of `NA` values. diff --git a/R/data_codebook.R b/R/data_codebook.R index 7608f08d5..d6fe46beb 100644 --- a/R/data_codebook.R +++ b/R/data_codebook.R @@ -232,9 +232,9 @@ data_codebook <- function(data, # add proportions, but not for ranges, since these are always 100% if (is_range) { - proportions <- "" + frq_proportions <- "" } else { - proportions <- sprintf("%.1f%%", round(100 * (frq / sum(frq)), 1)) + frq_proportions <- sprintf("%.1f%%", round(100 * (frq / sum(frq)), 1)) } # make sure we have not too long rows, e.g. for variables that @@ -245,9 +245,9 @@ data_codebook <- function(data, } if (length(frq) > max_values) { frq <- frq[1:max_values] - proportions <- proportions[1:max_values] + frq_proportions <- frq_proportions[1:max_values] frq[max_values] <- NA - proportions[max_values] <- NA + frq_proportions[max_values] <- NA } if (length(values) > max_values) { values <- values[1:max_values] @@ -273,7 +273,7 @@ data_codebook <- function(data, values, value_labels, frq, - proportions, + proportions = frq_proportions, stringsAsFactors = FALSE )) @@ -347,12 +347,12 @@ format.data_codebook <- function(x, format = "text", ...) { x$Prop[x$Prop == "NA" | is.na(x$Prop)] <- "" # align only for text format if (identical(format, "text")) { - x$Prop[x$Prop != ""] <- format(x$Prop[x$Prop != ""], justify = "right") + x$Prop[x$Prop != ""] <- format(x$Prop[x$Prop != ""], justify = "right") # nolint } - x[["N"]][x$Prop != ""] <- sprintf( + x[["N"]][x$Prop != ""] <- sprintf( # nolint "%s (%s)", - as.character(x[["N"]][x$Prop != ""]), - x$Prop[x$Prop != ""] + as.character(x[["N"]][x$Prop != ""]), # nolint + x$Prop[x$Prop != ""] # nolint ) x$Prop <- NULL } @@ -388,7 +388,7 @@ print_html.data_codebook <- function(x, # since we have each value at its own row, the HTML table contains # horizontal borders for each cell/row. We want to remove those borders # from rows that actually belong to one variable - separator_lines <- which(duplicated(x$.row_id) & x$N == "") + separator_lines <- which(duplicated(x$.row_id) & x$N == "") # nolint # remove separator lines, as we don't need these for HTML tables x <- x[-separator_lines, ] # check row IDs, and find odd rows @@ -405,7 +405,7 @@ print_html.data_codebook <- function(x, out <- gt::tab_style( out, style = list(gt::cell_borders(sides = "top", style = "hidden")), - locations = gt::cells_body(rows = which(x$ID == "")) + locations = gt::cells_body(rows = which(x$ID == "")) # nolint ) # highlight odd rows if (!is.null(row_color)) { @@ -466,5 +466,5 @@ print_md.data_codebook <- function(x, ...) { N = "r" ) align <- align[colnames(x)] - paste0(unname(align), collapse = "") + paste(unname(align), collapse = "") } diff --git a/R/data_partition.R b/R/data_partition.R index 5953480c5..09add9dd7 100644 --- a/R/data_partition.R +++ b/R/data_partition.R @@ -2,19 +2,20 @@ #' #' Creates data partitions (for instance, a training and a test set) based on a #' data frame that can also be stratified (i.e., evenly spread a given factor) -#' using the `group` argument. +#' using the `by` argument. #' #' @inheritParams data_rename #' @param proportion Scalar (between 0 and 1) or numeric vector, indicating the #' proportion(s) of the training set(s). The sum of `proportion` must not be #' greater than 1. The remaining part will be used for the test set. -#' @param group A character vector indicating the name(s) of the column(s) used +#' @param by A character vector indicating the name(s) of the column(s) used #' for stratified partitioning. #' @param seed A random number generator seed. Enter an integer (e.g. 123) so #' that the random sampling will be the same each time you run the function. #' @param row_id Character string, indicating the name of the column that #' contains the row-id's. #' @param verbose Toggle messages and warnings. +#' @param group Deprecated. Use `by` instead. #' #' @return A list of data frames. The list includes one training set per given #' proportion and the remaining data as test set. List elements of training @@ -28,7 +29,7 @@ #' nrow(out$p_0.9) #' #' # Stratify by group (equal proportions of each species) -#' out <- data_partition(iris, proportion = 0.9, group = "Species") +#' out <- data_partition(iris, proportion = 0.9, by = "Species") #' out$test #' #' # Create multiple partitions @@ -38,21 +39,28 @@ #' # Create multiple partitions, stratified by group - 30% equally sampled #' # from species in first training set, 50% in second training set and #' # remaining 20% equally sampled from each species in test set. -#' out <- data_partition(iris, proportion = c(0.3, 0.5), group = "Species") +#' out <- data_partition(iris, proportion = c(0.3, 0.5), by = "Species") #' lapply(out, function(i) table(i$Species)) #' #' @inherit data_rename seealso #' @export data_partition <- function(data, proportion = 0.7, - group = NULL, + by = NULL, seed = NULL, row_id = ".row_id", verbose = TRUE, + group = NULL, ...) { # validation checks data <- .coerce_to_dataframe(data) + ## TODO: remove warning in future release + if (!is.null(group)) { + by <- group + insight::format_warning("Argument `group` is deprecated and will be removed in a future release. Please use `by` instead.") # nolint + } + if (sum(proportion) > 1) { insight::format_error("Sum of `proportion` cannot be higher than 1.") } @@ -91,12 +99,12 @@ data_partition <- function(data, # Create list of data groups. We generally lapply over list of # sampled row-id's by group, thus, we even create a list if not grouped. - if (is.null(group)) { + if (is.null(by)) { indices_list <- list(seq_len(nrow(data))) } else { # else, split by group(s) and extract row-ids per group indices_list <- lapply( - split(data, data[group]), + split(data, data[by]), data_extract, select = row_id, as_data_frame = FALSE @@ -130,7 +138,7 @@ data_partition <- function(data, }) # we need to move all list elements one level higher. - if (is.null(group)) { + if (is.null(by)) { training_sets <- training_sets[[1]] } else { # for grouped training sets, we need to row-bind all sampled training diff --git a/R/data_xtabulate.R b/R/data_xtabulate.R index 0e38c9c07..3cb25d62b 100644 --- a/R/data_xtabulate.R +++ b/R/data_xtabulate.R @@ -234,7 +234,7 @@ print_html.dw_data_xtabulate <- function(x, big_mark = NULL, ...) { format(x, big_mark = big_mark, format = "html", ...), missing = "(NA)", format = "html", - group_by = "groups" + by = "groups" ) } @@ -270,7 +270,7 @@ print_html.dw_data_xtabulates <- function(x, big_mark = NULL, ...) { out, missing = "(NA)", format = "html", - group_by = "groups" + by = "groups" ) } } diff --git a/R/demean.R b/R/demean.R index 69e833a4d..bbf7d2dfc 100644 --- a/R/demean.R +++ b/R/demean.R @@ -11,7 +11,7 @@ #' @param x A data frame. #' @param select Character vector (or formula) with names of variables to select #' that should be group- and de-meaned. -#' @param group Character vector (or formula) with the name of the variable that +#' @param by Character vector (or formula) with the name of the variable that #' indicates the group- or cluster-ID. #' @param center Method for centering. `demean()` always performs #' mean-centering, while `degroup()` can use `center = "median"` or @@ -25,6 +25,7 @@ #' attributes to indicate the within- and between-effects. This is only #' relevant when printing `model_parameters()` - in such cases, the #' within- and between-effects are printed in separated blocks. +#' @param group Deprecated. Use `by` instead. #' @inheritParams center #' #' @return @@ -92,7 +93,7 @@ #' #' \subsection{Terminology}{ #' The group-meaned variable is simply the mean of an independent variable -#' within each group (or id-level or cluster) represented by `group`. +#' within each group (or id-level or cluster) represented by `by`. #' It represents the cluster-mean of an independent variable. The regression #' coefficient of a group-meaned variable is the *between-subject-effect*. #' The de-meaned variable is then the centered version of the group-meaned @@ -199,10 +200,10 @@ #' iris$ID <- sample(1:4, nrow(iris), replace = TRUE) # fake-ID #' iris$binary <- as.factor(rbinom(150, 1, .35)) # binary variable #' -#' x <- demean(iris, select = c("Sepal.Length", "Petal.Length"), group = "ID") +#' x <- demean(iris, select = c("Sepal.Length", "Petal.Length"), by = "ID") #' head(x) #' -#' x <- demean(iris, select = c("Sepal.Length", "binary", "Species"), group = "ID") +#' x <- demean(iris, select = c("Sepal.Length", "binary", "Species"), by = "ID") #' head(x) #' #' @@ -213,23 +214,30 @@ #' y = c(1, 2, 1, 2, 4, 3, 2, 1), #' ID = c(1, 2, 3, 1, 2, 3, 1, 2) #' ) -#' demean(dat, select = c("a", "x*y"), group = "ID") +#' demean(dat, select = c("a", "x*y"), by = "ID") #' #' # or in formula-notation -#' demean(dat, select = ~ a + x * y, group = ~ID) +#' demean(dat, select = ~ a + x * y, by = ~ID) #' #' @export demean <- function(x, select, - group, + by, suffix_demean = "_within", suffix_groupmean = "_between", add_attributes = TRUE, - verbose = TRUE) { + verbose = TRUE, + group = NULL) { + ## TODO: remove warning in future release + if (!is.null(group)) { + by <- group + insight::format_warning("Argument `group` is deprecated and will be removed in a future release. Please use `by` instead.") # nolint + } + degroup( x = x, select = select, - group = group, + by = by, center = "mean", suffix_demean = suffix_demean, suffix_groupmean = suffix_groupmean, @@ -247,12 +255,19 @@ demean <- function(x, #' @export degroup <- function(x, select, - group, + by, center = "mean", suffix_demean = "_within", suffix_groupmean = "_between", add_attributes = TRUE, - verbose = TRUE) { + verbose = TRUE, + group = NULL) { + ## TODO: remove warning later + if (!is.null(group)) { + by <- group + insight::format_warning("Argument `group` is deprecated and will be removed in a future release. Please use `by` instead.") # nolint + } + # ugly tibbles again... x <- .coerce_to_dataframe(x) @@ -266,8 +281,8 @@ degroup <- function(x, )) } - if (inherits(group, "formula")) { - group <- all.vars(group) + if (inherits(by, "formula")) { + by <- all.vars(by) } interactions_no <- select[!grepl("(\\*|\\:)", select)] @@ -296,7 +311,7 @@ degroup <- function(x, select <- intersect(colnames(x), select) # get data to demean... - dat <- x[, c(select, group)] + dat <- x[, c(select, by)] # find categorical predictors that are coded as factors @@ -344,31 +359,18 @@ degroup <- function(x, # for variables within each group (the group means). assign # mean values to a vector of same length as the data - if (center == "mode") { - x_gm_list <- lapply(select, function(i) { - stats::ave(dat[[i]], dat[[group]], FUN = function(.gm) distribution_mode(stats::na.omit(.gm))) - }) - } else if (center == "median") { - x_gm_list <- lapply(select, function(i) { - stats::ave(dat[[i]], dat[[group]], FUN = function(.gm) stats::median(.gm, na.rm = TRUE)) - }) - } else if (center == "min") { - x_gm_list <- lapply(select, function(i) { - stats::ave(dat[[i]], dat[[group]], FUN = function(.gm) min(.gm, na.rm = TRUE)) - }) - } else if (center == "max") { - x_gm_list <- lapply(select, function(i) { - stats::ave(dat[[i]], dat[[group]], FUN = function(.gm) max(.gm, na.rm = TRUE)) - }) - } else { - x_gm_list <- lapply(select, function(i) { - stats::ave(dat[[i]], dat[[group]], FUN = function(.gm) mean(.gm, na.rm = TRUE)) - }) - } - + gm_fun <- switch(center, + mode = function(.gm) distribution_mode(stats::na.omit(.gm)), + median = function(.gm) stats::median(.gm, na.rm = TRUE), + min = function(.gm) min(.gm, na.rm = TRUE), + max = function(.gm) max(.gm, na.rm = TRUE), + function(.gm) mean(.gm, na.rm = TRUE) + ) + x_gm_list <- lapply(select, function(i) { + stats::ave(dat[[i]], dat[[by]], FUN = gm_fun) + }) names(x_gm_list) <- select - # create de-meaned variables by subtracting the group mean from each individual value x_dm_list <- lapply(select, function(i) dat[[i]] - x_gm_list[[i]]) diff --git a/R/means_by_group.R b/R/means_by_group.R index faa73eba6..ad188f275 100644 --- a/R/means_by_group.R +++ b/R/means_by_group.R @@ -4,10 +4,10 @@ #' @description Computes summary table of means by groups. #' #' @param x A vector or a data frame. -#' @param group If `x` is a numeric vector, `group` should be a factor that -#' indicates the group-classifying categories. If `x` is a data frame, `group` +#' @param by If `x` is a numeric vector, `by` should be a factor that +#' indicates the group-classifying categories. If `x` is a data frame, `by` #' should be a character string, naming the variable in `x` that is used for -#' grouping. Numeric vectors are coerced to factors. Not that `group` should +#' grouping. Numeric vectors are coerced to factors. Not that `by` should #' only refer to a single variable. #' @param ci Level of confidence interval for mean estimates. Default is `0.95`. #' Use `ci = NA` to suppress confidence intervals. @@ -19,14 +19,15 @@ #' @param digits Optional scalar, indicating the amount of digits after decimal #' point when rounding estimates and values. #' @param ... Currently not used +#' @param group Deprecated. Use `by` instead. #' @inheritParams find_columns #' #' @return A data frame with information on mean and further summary statistics #' for each sub-group. #' -#' @details This function is comparable to `aggregate(x, group, mean)`, but provides +#' @details This function is comparable to `aggregate(x, by, mean)`, but provides #' some further information, including summary statistics from a One-Way-ANOVA -#' using `x` as dependent and `group` as independent variable. [`emmeans::contrast()`] +#' using `x` as dependent and `by` as independent variable. [`emmeans::contrast()`] #' is used to get p-values for each sub-group. P-values indicate whether each #' group-mean is significantly different from the total mean. #' @@ -55,21 +56,28 @@ means_by_group.default <- function(x, ...) { #' @rdname means_by_group #' @export means_by_group.numeric <- function(x, - group = NULL, + by = NULL, ci = 0.95, weights = NULL, digits = NULL, + group = NULL, ...) { + ## TODO: remove warning in future release + if (!is.null(group)) { + by <- group + insight::format_warning("Argument `group` is deprecated and will be removed in a future release. Please use `by` instead.") # nolint + } + # validation check for arguments - # "group" must be provided - if (is.null(group)) { - insight::format_error("Argument `group` is missing.") + # "by" must be provided + if (is.null(by)) { + insight::format_error("Argument `by` is missing.") } - # group must be of same length as x - if (length(group) != length(x)) { - insight::format_error("Argument `group` must be of same length as `x`.") + # by must be of same length as x + if (length(by) != length(x)) { + insight::format_error("Argument `by` must be of same length as `x`.") } # if weights are provided, must be of same length as x @@ -82,32 +90,32 @@ means_by_group.numeric <- function(x, # retrieve labels var_mean_label <- attr(x, "label", exact = TRUE) - var_grp_label <- attr(group, "label", exact = TRUE) + var_grp_label <- attr(by, "label", exact = TRUE) # if no labels present, use variable names directly if (is.null(var_mean_label)) { var_mean_label <- deparse(substitute(x)) } if (is.null(var_grp_label)) { - var_grp_label <- deparse(substitute(group)) + var_grp_label <- deparse(substitute(by)) } # coerce group to factor if numeric, or convert labels to levels, if factor - if (is.factor(group)) { - group <- tryCatch(labels_to_levels(group, verbose = FALSE), error = function(e) group) + if (is.factor(by)) { + by <- tryCatch(labels_to_levels(by, verbose = FALSE), error = function(e) by) } else { - group <- to_factor(group) + by <- to_factor(by) } - data <- stats::na.omit(data.frame( + my_data <- stats::na.omit(data.frame( x = x, - group = group, + group = by, weights = weights, stringsAsFactors = FALSE )) # get grouped means table - out <- .means_by_group(data, ci = ci) + out <- .means_by_group(my_data, ci = ci) # attributes attr(out, "var_mean_label") <- var_mean_label @@ -123,7 +131,7 @@ means_by_group.numeric <- function(x, #' @export means_by_group.data.frame <- function(x, select = NULL, - group = NULL, + by = NULL, ci = 0.95, weights = NULL, digits = NULL, @@ -131,7 +139,14 @@ means_by_group.data.frame <- function(x, ignore_case = FALSE, regex = FALSE, verbose = TRUE, + group = NULL, ...) { + ## TODO: remove warning in future release + if (!is.null(group)) { + by <- group + insight::format_warning("Argument `group` is deprecated and will be removed in a future release. Please use `by` instead.") # nolint + } + # evaluate select/exclude, may be select-helpers select <- .select_nse(select, x, @@ -154,11 +169,11 @@ means_by_group.data.frame <- function(x, if (is.null(attr(x[[i]], "label", exact = TRUE))) { attr(x[[i]], "label") <- i } - if (is.null(attr(x[[group]], "label", exact = TRUE))) { - attr(x[[group]], "label") <- group + if (is.null(attr(x[[by]], "label", exact = TRUE))) { + attr(x[[by]], "label") <- by } # compute means table - means_by_group(x[[i]], group = x[[group]], ci = ci, weights = w, digits = digits, ...) + means_by_group(x[[i]], by = x[[by]], ci = ci, weights = w, digits = digits, ...) }) class(out) <- c("dw_groupmeans_list", "list") @@ -195,14 +210,14 @@ means_by_group.data.frame <- function(x, if (insight::check_if_installed("emmeans", quietly = TRUE)) { # create summary table of contrasts, for p-values and confidence intervals predicted <- emmeans::emmeans(fit, specs = "group", level = ci) - contrasts <- emmeans::contrast(predicted, method = "eff") + emm_contrasts <- emmeans::contrast(predicted, method = "eff") # add p-values and confidence intervals to "out" if (!is.null(ci) && !is.na(ci)) { summary_table <- as.data.frame(predicted) out$CI_low <- summary_table$lower.CL out$CI_high <- summary_table$upper.CL } - summary_table <- as.data.frame(contrasts) + summary_table <- as.data.frame(emm_contrasts) out$p <- summary_table$p.value } diff --git a/R/recode_values.R b/R/recode_values.R index e0bb9540f..e355e9cb0 100644 --- a/R/recode_values.R +++ b/R/recode_values.R @@ -476,7 +476,7 @@ recode_values.data.frame <- function(x, # create the new variables and updates "select", so new variables are processed if (!isFALSE(append)) { # process arguments - args <- .process_append( + my_args <- .process_append( x, select, append, @@ -484,8 +484,8 @@ recode_values.data.frame <- function(x, preserve_value_labels = TRUE ) # update processed arguments - x <- args$x - select <- args$select + x <- my_args$x + select <- my_args$select } x[select] <- lapply( diff --git a/R/rescale_weights.R b/R/rescale_weights.R index 6f82acdad..02aab1d2e 100644 --- a/R/rescale_weights.R +++ b/R/rescale_weights.R @@ -10,16 +10,17 @@ #' models, which then can be used for multilevel modelling. #' #' @param data A data frame. -#' @param group Variable names (as character vector, or as formula), indicating +#' @param by Variable names (as character vector, or as formula), indicating #' the grouping structure (strata) of the survey data (level-2-cluster #' variable). It is also possible to create weights for multiple group #' variables; in such cases, each created weighting variable will be suffixed #' by the name of the group variable. #' @param probability_weights Variable indicating the probability (design or #' sampling) weights of the survey data (level-1-weight). -#' @param nest Logical, if `TRUE` and `group` indicates at least two +#' @param nest Logical, if `TRUE` and `by` indicates at least two #' group variables, then groups are "nested", i.e. groups are now a -#' combination from each group level of the variables in `group`. +#' combination from each group level of the variables in `by`. +#' @param group Deprecated. Use `by` instead. #' #' @return `data`, including the new weighting variables: `pweights_a` #' and `pweights_b`, which represent the rescaled design weights to use @@ -71,7 +72,7 @@ #' # or nested structures. #' x <- rescale_weights( #' data = nhanes_sample, -#' group = c("SDMVSTRA", "SDMVPSU"), +#' by = c("SDMVSTRA", "SDMVPSU"), #' probability_weights = "WTINT2YR", #' nest = TRUE #' ) @@ -87,9 +88,15 @@ #' ) #' } #' @export -rescale_weights <- function(data, group, probability_weights, nest = FALSE) { - if (inherits(group, "formula")) { - group <- all.vars(group) +rescale_weights <- function(data, by, probability_weights, nest = FALSE, group = NULL) { + ## TODO: remove warning in future release + if (!is.null(group)) { + by <- group + insight::format_warning("Argument `group` is deprecated and will be removed in a future release. Please use `by` instead.") # nolint + } + + if (inherits(by, "formula")) { + by <- all.vars(by) } # check if weight has missings. we need to remove them first, @@ -107,22 +114,22 @@ rescale_weights <- function(data, group, probability_weights, nest = FALSE) { # sort id data_tmp$.bamboozled <- seq_len(nrow(data_tmp)) - if (nest && length(group) < 2) { + if (nest && length(by) < 2) { insight::format_warning( sprintf( - "Only one group variable selected, no nested structure possible. Rescaling weights for grout '%s' now.", - group + "Only one group variable selected in `by`, no nested structure possible. Rescaling weights for grout '%s' now.", + by ) ) nest <- FALSE } if (nest) { - out <- .rescale_weights_nested(data_tmp, group, probability_weights, nrow(data), weight_non_na) + out <- .rescale_weights_nested(data_tmp, group = by, probability_weights, nrow(data), weight_non_na) } else { - out <- lapply(group, function(i) { + out <- lapply(by, function(i) { x <- .rescale_weights(data_tmp, i, probability_weights, nrow(data), weight_non_na) - if (length(group) > 1) { + if (length(by) > 1) { colnames(x) <- sprintf(c("pweight_a_%s", "pweight_b_%s"), i) } x diff --git a/R/select_nse.R b/R/select_nse.R index 118d40b15..1578a9fa1 100644 --- a/R/select_nse.R +++ b/R/select_nse.R @@ -148,7 +148,8 @@ # 3 types of symbols: # - unquoted variables -# - objects that need to be evaluated, e.g data_find(iris, i) where i is a +# - objects that need to be evaluated, e.g data_find(iris, i) where +# i is a # function arg or is defined before. This can also be a vector of names or # positions. # - functions (without parenthesis) @@ -180,24 +181,22 @@ # if starts_with() et al. come from tidyselect but need to be used in # a select environment, then the error doesn't have the same structure. - if (is.null(fn) && - grepl("must be used within a", e$message, fixed = TRUE)) { - trace <- lapply(e$trace$call, function(x) { + if (is.null(fn) && grepl("must be used within a", e$message, fixed = TRUE)) { + call_trace <- lapply(e$trace$call, function(x) { tmp <- insight::safe_deparse(x) if (grepl(paste0("^", .regex_select_helper()), tmp)) { tmp } }) - fn <- Filter(Negate(is.null), trace)[1] + fn <- Filter(Negate(is.null), call_trace)[1] } # if we actually obtain the select helper call, return it, else return # what we already had if (length(fn) > 0L && grepl(.regex_select_helper(), fn)) { is_select_helper <<- TRUE return(fn) - } else { - NULL } + NULL } ) @@ -249,7 +248,7 @@ switch(type, `:` = .select_seq(x, data, ignore_case, regex, verbose), `-` = .select_minus(x, data, ignore_case, regex, verbose), - `c` = .select_c(x, data, ignore_case, regex, verbose), + `c` = .select_c(x, data, ignore_case, regex, verbose), # nolint `(` = .select_bracket(x, data, ignore_case, regex, verbose), `[` = .select_square_bracket(x, data, ignore_case, regex, verbose), `$` = .select_dollar(x, data, ignore_case, regex, verbose), @@ -494,7 +493,7 @@ # Almost identical to dynGet(). The difference is that we deparse the expression # because get0() allows symbol only since R 4.1.0 .dynGet <- function(x, - ifnotfound = stop(gettextf("%s not found", sQuote(x)), domain = NA), + ifnotfound = stop(gettextf("%s not found", sQuote(x)), domain = NA, call. = FALSE), minframe = 1L, inherits = FALSE) { x <- insight::safe_deparse(x) @@ -518,7 +517,7 @@ # Custom arg "remove_n_top_env" to remove the first environments which are # ".select_nse()" and the other custom functions .dynEval <- function(x, - ifnotfound = stop(gettextf("%s not found", sQuote(x)), domain = NA), + ifnotfound = stop(gettextf("%s not found", sQuote(x)), domain = NA, call. = FALSE), minframe = 1L, inherits = FALSE, remove_n_top_env = 0) { diff --git a/R/skewness_kurtosis.R b/R/skewness_kurtosis.R index e0da83c54..6142c59ad 100644 --- a/R/skewness_kurtosis.R +++ b/R/skewness_kurtosis.R @@ -115,6 +115,7 @@ skewness.numeric <- function(x, # TODO: remove deprecated argument later if (!missing(na.rm)) { # TODO: add deprecation warning in a later update + insight::format_warning("Argument `na.rm` is deprecated and will be removed in a future release. Please use `remove_na` instead.") # nolint remove_na <- na.rm } @@ -148,9 +149,7 @@ skewness.numeric <- function(x, ) if (!is.null(iterations)) { - if (!requireNamespace("boot", quietly = TRUE)) { - insight::format_warning("Package 'boot' needed for bootstrapping SEs.") - } else { + if (requireNamespace("boot", quietly = TRUE)) { results <- boot::boot( data = x, statistic = .boot_skewness, @@ -159,6 +158,8 @@ skewness.numeric <- function(x, type = type ) out_se <- stats::sd(results$t, na.rm = TRUE) + } else { + insight::format_warning("Package 'boot' needed for bootstrapping SEs.") } } @@ -181,6 +182,7 @@ skewness.matrix <- function(x, # TODO: remove deprecated argument later if (!missing(na.rm)) { # TODO: add deprecation warning in a later update + insight::format_warning("Argument `na.rm` is deprecated and will be removed in a future release. Please use `remove_na` instead.") # nolint remove_na <- na.rm } @@ -216,6 +218,7 @@ skewness.data.frame <- function(x, # TODO: remove deprecated argument later if (!missing(na.rm)) { # TODO: add deprecation warning in a later update + insight::format_warning("Argument `na.rm` is deprecated and will be removed in a future release. Please use `remove_na` instead.") # nolint remove_na <- na.rm } @@ -243,6 +246,7 @@ skewness.default <- function(x, # TODO: remove deprecated argument later if (!missing(na.rm)) { # TODO: add deprecation warning in a later update + insight::format_warning("Argument `na.rm` is deprecated and will be removed in a future release. Please use `remove_na` instead.") # nolint remove_na <- na.rm } @@ -278,6 +282,7 @@ kurtosis.numeric <- function(x, # TODO: remove deprecated argument later if (!missing(na.rm)) { # TODO: add deprecation warning in a later update + insight::format_warning("Argument `na.rm` is deprecated and will be removed in a future release. Please use `remove_na` instead.") # nolint remove_na <- na.rm } @@ -342,6 +347,7 @@ kurtosis.matrix <- function(x, # TODO: remove deprecated argument later if (!missing(na.rm)) { # TODO: add deprecation warning in a later update + insight::format_warning("Argument `na.rm` is deprecated and will be removed in a future release. Please use `remove_na` instead.") # nolint remove_na <- na.rm } @@ -373,6 +379,7 @@ kurtosis.data.frame <- function(x, # TODO: remove deprecated argument later if (!missing(na.rm)) { # TODO: add deprecation warning in a later update + insight::format_warning("Argument `na.rm` is deprecated and will be removed in a future release. Please use `remove_na` instead.") # nolint remove_na <- na.rm } @@ -398,6 +405,7 @@ kurtosis.default <- function(x, # TODO: remove deprecated argument later if (!missing(na.rm)) { # TODO: add deprecation warning in a later update + insight::format_warning("Argument `na.rm` is deprecated and will be removed in a future release. Please use `remove_na` instead.") # nolint remove_na <- na.rm } diff --git a/R/text_format.R b/R/text_format.R index ec70e87da..b15935542 100644 --- a/R/text_format.R +++ b/R/text_format.R @@ -68,18 +68,18 @@ text_lastchar <- function(text, n = 1) { #' @rdname text_format #' @export text_concatenate <- function(text, sep = ", ", last = " and ", enclose = NULL) { - if (length(text) == 1 && nchar(text) == 0) { + if (length(text) == 1 && !nzchar(text, keepNA = TRUE)) { return(text) } - text <- text[text != ""] - if (length(text) && !is.null(enclose) && length(enclose) == 1 && nchar(enclose) > 0) { + text <- text[text != ""] # nolint + if (length(text) && !is.null(enclose) && length(enclose) == 1 && nzchar(enclose, keepNA = TRUE)) { text <- paste0(enclose, text, enclose) } if (length(text) == 1) { s <- text } else { - s <- paste0(text[1:(length(text) - 1)], collapse = sep) - s <- paste0(c(s, text[length(text)]), collapse = last) + s <- paste(text[1:(length(text) - 1)], collapse = sep) + s <- paste(c(s, text[length(text)]), collapse = last) } s } @@ -89,7 +89,7 @@ text_concatenate <- function(text, sep = ", ", last = " and ", enclose = NULL) { #' @export text_paste <- function(text, text2 = NULL, sep = ", ", enclose = NULL, ...) { if (!is.null(text2)) { - if (!is.null(enclose) && length(enclose) == 1 && nchar(enclose) > 0) { + if (!is.null(enclose) && length(enclose) == 1 && nzchar(enclose, keepNA = TRUE)) { text <- vapply(text, function(i) { if (i != "") { i <- paste0(enclose, i, enclose) @@ -103,7 +103,7 @@ text_paste <- function(text, text2 = NULL, sep = ", ", enclose = NULL, ...) { i }, character(1L)) } - paste0(text, ifelse(text == "" | text2 == "", "", sep), text2) + paste0(text, ifelse(text == "" | text2 == "", "", sep), text2) # nolint } } @@ -130,7 +130,7 @@ text_wrap <- function(text, width = NULL, ...) { if (nchar(s) > width) { leading_spaces <- nchar(s) - nchar(insight::trim_ws(s)) s <- strwrap(s, width = width) - s <- paste0(s, collapse = "\n") + s <- paste(s, collapse = "\n") s <- paste0(strrep(" ", leading_spaces), s) } wrapped <- paste0(wrapped, s, "\n") diff --git a/README.Rmd b/README.Rmd index 6a696194c..35de33f56 100644 --- a/README.Rmd +++ b/README.Rmd @@ -314,7 +314,7 @@ iris |> # all rows where Species is "versicolor" or "virginica" data_filter(Species %in% c("versicolor", "virginica")) |> # select only columns with "." in names (i.e. drop Species) - get_columns(contains("\\.")) |> + data_select(contains("\\.")) |> # move columns that ends with "Length" to start of data frame data_relocate(ends_with("Length")) |> # remove fourth column diff --git a/README.md b/README.md index 0d459a9cf..b164c693f 100644 --- a/README.md +++ b/README.md @@ -584,7 +584,7 @@ iris |> # all rows where Species is "versicolor" or "virginica" data_filter(Species %in% c("versicolor", "virginica")) |> # select only columns with "." in names (i.e. drop Species) - get_columns(contains("\\.")) |> + data_select(contains("\\.")) |> # move columns that ends with "Length" to start of data frame data_relocate(ends_with("Length")) |> # remove fourth column diff --git a/man/data_partition.Rd b/man/data_partition.Rd index 8e7cae95b..4ed71e3c1 100644 --- a/man/data_partition.Rd +++ b/man/data_partition.Rd @@ -7,10 +7,11 @@ data_partition( data, proportion = 0.7, - group = NULL, + by = NULL, seed = NULL, row_id = ".row_id", verbose = TRUE, + group = NULL, ... ) } @@ -21,7 +22,7 @@ data_partition( proportion(s) of the training set(s). The sum of \code{proportion} must not be greater than 1. The remaining part will be used for the test set.} -\item{group}{A character vector indicating the name(s) of the column(s) used +\item{by}{A character vector indicating the name(s) of the column(s) used for stratified partitioning.} \item{seed}{A random number generator seed. Enter an integer (e.g. 123) so @@ -32,6 +33,8 @@ contains the row-id's.} \item{verbose}{Toggle messages and warnings.} +\item{group}{Deprecated. Use \code{by} instead.} + \item{...}{Other arguments passed to or from other functions.} } \value{ @@ -43,7 +46,7 @@ is named \verb{$test}. \description{ Creates data partitions (for instance, a training and a test set) based on a data frame that can also be stratified (i.e., evenly spread a given factor) -using the \code{group} argument. +using the \code{by} argument. } \examples{ data(iris) @@ -52,7 +55,7 @@ out$test nrow(out$p_0.9) # Stratify by group (equal proportions of each species) -out <- data_partition(iris, proportion = 0.9, group = "Species") +out <- data_partition(iris, proportion = 0.9, by = "Species") out$test # Create multiple partitions @@ -62,7 +65,7 @@ lapply(out, head) # Create multiple partitions, stratified by group - 30\% equally sampled # from species in first training set, 50\% in second training set and # remaining 20\% equally sampled from each species in test set. -out <- data_partition(iris, proportion = c(0.3, 0.5), group = "Species") +out <- data_partition(iris, proportion = c(0.3, 0.5), by = "Species") lapply(out, function(i) table(i$Species)) } diff --git a/man/demean.Rd b/man/demean.Rd index 422c8d32e..d03a1010b 100644 --- a/man/demean.Rd +++ b/man/demean.Rd @@ -9,33 +9,36 @@ demean( x, select, - group, + by, suffix_demean = "_within", suffix_groupmean = "_between", add_attributes = TRUE, - verbose = TRUE + verbose = TRUE, + group = NULL ) degroup( x, select, - group, + by, center = "mean", suffix_demean = "_within", suffix_groupmean = "_between", add_attributes = TRUE, - verbose = TRUE + verbose = TRUE, + group = NULL ) detrend( x, select, - group, + by, center = "mean", suffix_demean = "_within", suffix_groupmean = "_between", add_attributes = TRUE, - verbose = TRUE + verbose = TRUE, + group = NULL ) } \arguments{ @@ -44,7 +47,7 @@ detrend( \item{select}{Character vector (or formula) with names of variables to select that should be group- and de-meaned.} -\item{group}{Character vector (or formula) with the name of the variable that +\item{by}{Character vector (or formula) with the name of the variable that indicates the group- or cluster-ID.} \item{suffix_demean, suffix_groupmean}{String value, will be appended to the @@ -59,6 +62,8 @@ within- and between-effects are printed in separated blocks.} \item{verbose}{Toggle warnings and messages.} +\item{group}{Deprecated. Use \code{by} instead.} + \item{center}{Method for centering. \code{demean()} always performs mean-centering, while \code{degroup()} can use \code{center = "median"} or \code{center = "mode"} for median- or mode-centering, and also \code{"min"} @@ -131,7 +136,7 @@ intervals and low statistical power} (\cite{Heisig et al. 2017}). \subsection{Terminology}{ The group-meaned variable is simply the mean of an independent variable -within each group (or id-level or cluster) represented by \code{group}. +within each group (or id-level or cluster) represented by \code{by}. It represents the cluster-mean of an independent variable. The regression coefficient of a group-meaned variable is the \emph{between-subject-effect}. The de-meaned variable is then the centered version of the group-meaned @@ -209,10 +214,10 @@ data(iris) iris$ID <- sample(1:4, nrow(iris), replace = TRUE) # fake-ID iris$binary <- as.factor(rbinom(150, 1, .35)) # binary variable -x <- demean(iris, select = c("Sepal.Length", "Petal.Length"), group = "ID") +x <- demean(iris, select = c("Sepal.Length", "Petal.Length"), by = "ID") head(x) -x <- demean(iris, select = c("Sepal.Length", "binary", "Species"), group = "ID") +x <- demean(iris, select = c("Sepal.Length", "binary", "Species"), by = "ID") head(x) @@ -223,10 +228,10 @@ dat <- data.frame( y = c(1, 2, 1, 2, 4, 3, 2, 1), ID = c(1, 2, 3, 1, 2, 3, 1, 2) ) -demean(dat, select = c("a", "x*y"), group = "ID") +demean(dat, select = c("a", "x*y"), by = "ID") # or in formula-notation -demean(dat, select = ~ a + x * y, group = ~ID) +demean(dat, select = ~ a + x * y, by = ~ID) } \references{ diff --git a/man/means_by_group.Rd b/man/means_by_group.Rd index 9434452ad..5473daeec 100644 --- a/man/means_by_group.Rd +++ b/man/means_by_group.Rd @@ -8,12 +8,20 @@ \usage{ means_by_group(x, ...) -\method{means_by_group}{numeric}(x, group = NULL, ci = 0.95, weights = NULL, digits = NULL, ...) +\method{means_by_group}{numeric}( + x, + by = NULL, + ci = 0.95, + weights = NULL, + digits = NULL, + group = NULL, + ... +) \method{means_by_group}{data.frame}( x, select = NULL, - group = NULL, + by = NULL, ci = 0.95, weights = NULL, digits = NULL, @@ -21,6 +29,7 @@ means_by_group(x, ...) ignore_case = FALSE, regex = FALSE, verbose = TRUE, + group = NULL, ... ) } @@ -29,10 +38,10 @@ means_by_group(x, ...) \item{...}{Currently not used} -\item{group}{If \code{x} is a numeric vector, \code{group} should be a factor that -indicates the group-classifying categories. If \code{x} is a data frame, \code{group} +\item{by}{If \code{x} is a numeric vector, \code{by} should be a factor that +indicates the group-classifying categories. If \code{x} is a data frame, \code{by} should be a character string, naming the variable in \code{x} that is used for -grouping. Numeric vectors are coerced to factors. Not that \code{group} should +grouping. Numeric vectors are coerced to factors. Not that \code{by} should only refer to a single variable.} \item{ci}{Level of confidence interval for mean estimates. Default is \code{0.95}. @@ -47,6 +56,8 @@ weights are used.} \item{digits}{Optional scalar, indicating the amount of digits after decimal point when rounding estimates and values.} +\item{group}{Deprecated. Use \code{by} instead.} + \item{select}{Variables that will be included when performing the required tasks. Can be either \itemize{ @@ -106,9 +117,9 @@ for each sub-group. Computes summary table of means by groups. } \details{ -This function is comparable to \code{aggregate(x, group, mean)}, but provides +This function is comparable to \code{aggregate(x, by, mean)}, but provides some further information, including summary statistics from a One-Way-ANOVA -using \code{x} as dependent and \code{group} as independent variable. \code{\link[emmeans:contrast]{emmeans::contrast()}} +using \code{x} as dependent and \code{by} as independent variable. \code{\link[emmeans:contrast]{emmeans::contrast()}} is used to get p-values for each sub-group. P-values indicate whether each group-mean is significantly different from the total mean. } diff --git a/man/rescale_weights.Rd b/man/rescale_weights.Rd index 4a005eb99..4a67d4100 100644 --- a/man/rescale_weights.Rd +++ b/man/rescale_weights.Rd @@ -4,12 +4,12 @@ \alias{rescale_weights} \title{Rescale design weights for multilevel analysis} \usage{ -rescale_weights(data, group, probability_weights, nest = FALSE) +rescale_weights(data, by, probability_weights, nest = FALSE, group = NULL) } \arguments{ \item{data}{A data frame.} -\item{group}{Variable names (as character vector, or as formula), indicating +\item{by}{Variable names (as character vector, or as formula), indicating the grouping structure (strata) of the survey data (level-2-cluster variable). It is also possible to create weights for multiple group variables; in such cases, each created weighting variable will be suffixed @@ -18,9 +18,11 @@ by the name of the group variable.} \item{probability_weights}{Variable indicating the probability (design or sampling) weights of the survey data (level-1-weight).} -\item{nest}{Logical, if \code{TRUE} and \code{group} indicates at least two +\item{nest}{Logical, if \code{TRUE} and \code{by} indicates at least two group variables, then groups are "nested", i.e. groups are now a -combination from each group level of the variables in \code{group}.} +combination from each group level of the variables in \code{by}.} + +\item{group}{Deprecated. Use \code{by} instead.} } \value{ \code{data}, including the new weighting variables: \code{pweights_a} @@ -73,7 +75,7 @@ if (require("lme4")) { # or nested structures. x <- rescale_weights( data = nhanes_sample, - group = c("SDMVSTRA", "SDMVPSU"), + by = c("SDMVSTRA", "SDMVPSU"), probability_weights = "WTINT2YR", nest = TRUE ) diff --git a/tests/testthat/_snaps/data_partition.md b/tests/testthat/_snaps/data_partition.md index 5cf3ffbf1..fdf76fc36 100644 --- a/tests/testthat/_snaps/data_partition.md +++ b/tests/testthat/_snaps/data_partition.md @@ -88,7 +88,7 @@ --- Code - str(data_partition(iris, proportion = 0.7, group = "Species", seed = 123)) + str(data_partition(iris, proportion = 0.7, by = "Species", seed = 123)) Output List of 2 $ p_0.7:'data.frame': 105 obs. of 6 variables: @@ -109,7 +109,7 @@ --- Code - str(data_partition(iris, proportion = c(0.2, 0.5), group = "Species", seed = 123)) + str(data_partition(iris, proportion = c(0.2, 0.5), by = "Species", seed = 123)) Output List of 3 $ p_0.2:'data.frame': 30 obs. of 6 variables: diff --git a/tests/testthat/_snaps/demean.md b/tests/testthat/_snaps/demean.md index f61ba9fcb..7f12d263d 100644 --- a/tests/testthat/_snaps/demean.md +++ b/tests/testthat/_snaps/demean.md @@ -55,7 +55,7 @@ # demean interaction term Code - demean(dat, select = c("a", "x*y"), group = "ID") + demean(dat, select = c("a", "x*y"), by = "ID") Output a_between x_y_between a_within x_y_within 1 2.666667 4.666667 -1.6666667 -0.6666667 diff --git a/tests/testthat/_snaps/rescale_weights.md b/tests/testthat/_snaps/rescale_weights.md index d158070a8..5de6d489a 100644 --- a/tests/testthat/_snaps/rescale_weights.md +++ b/tests/testthat/_snaps/rescale_weights.md @@ -34,7 +34,7 @@ # rescale_weights nested works as expected Code - rescale_weights(data = head(nhanes_sample, n = 30), group = c("SDMVSTRA", + rescale_weights(data = head(nhanes_sample, n = 30), by = c("SDMVSTRA", "SDMVPSU"), probability_weights = "WTINT2YR", nest = TRUE) Output total age RIAGENDR RIDRETH1 SDMVPSU SDMVSTRA WTINT2YR pweights_a diff --git a/tests/testthat/_snaps/text_format.md b/tests/testthat/_snaps/text_format.md index 78716bb34..6516c72af 100644 --- a/tests/testthat/_snaps/text_format.md +++ b/tests/testthat/_snaps/text_format.md @@ -1,7 +1,7 @@ # text formatting helpers work as expected Code - format_text(c("A very long First", "Some similar long Second", "Shorter Third", + text_format(c("A very long First", "Some similar long Second", "Shorter Third", "More or less long Fourth", "And finally the Last"), width = 20) Output [1] "A very long First,\nSome similar long\nSecond, Shorter\nThird, More or less\nlong Fourth and And\nfinally the Last\n" @@ -9,7 +9,7 @@ --- Code - format_text(c("A very long First", "Some similar long Second", "Shorter Third", + text_format(c("A very long First", "Some similar long Second", "Shorter Third", "More or less long Fourth", "And finally the Last"), last = " or ", enclose = "`", width = 20) Output @@ -19,7 +19,7 @@ Code long_text <- strrep("abc ", 100) - cat(format_text(long_text, width = 50)) + cat(text_format(long_text, width = 50)) Output abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc @@ -31,7 +31,7 @@ abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc Code - cat(format_text(long_text, width = 80)) + cat(text_format(long_text, width = 80)) Output abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc @@ -40,7 +40,7 @@ abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc Code withr::with_options(list(width = 50), code = { - cat(format_text(long_text)) + cat(text_format(long_text)) }) Output abc abc abc abc abc abc abc abc abc abc abc abc diff --git a/tests/testthat/test-attributes.R b/tests/testthat/test-attributes.R index 2fc88ecc9..ebd26de99 100644 --- a/tests/testthat/test-attributes.R +++ b/tests/testthat/test-attributes.R @@ -177,10 +177,10 @@ test_that("categorize, attributes preserved", { # change_code ----------------------------------- -test_that("change_code, attributes preserved", { +test_that("recode_values, attributes preserved", { x <- mtcars attr(x, "myattri") <- "I'm here" - x2 <- change_code(x, select = "am", recode = list(`5` = 0, `10` = 1)) + x2 <- recode_values(x, select = "am", recode = list(`5` = 0, `10` = 1)) expect_identical(attr(x2, "myattri", exact = TRUE), "I'm here") }) diff --git a/tests/testthat/test-data_partition.R b/tests/testthat/test-data_partition.R index 7443465a8..99f4535b2 100644 --- a/tests/testthat/test-data_partition.R +++ b/tests/testthat/test-data_partition.R @@ -53,8 +53,8 @@ test_that("data_partition works as expected", { data(iris) expect_snapshot(str(data_partition(iris, proportion = 0.7, seed = 123))) expect_snapshot(str(data_partition(iris, proportion = c(0.2, 0.5), seed = 123))) - expect_snapshot(str(data_partition(iris, proportion = 0.7, group = "Species", seed = 123))) - expect_snapshot(str(data_partition(iris, proportion = c(0.2, 0.5), group = "Species", seed = 123))) + expect_snapshot(str(data_partition(iris, proportion = 0.7, by = "Species", seed = 123))) + expect_snapshot(str(data_partition(iris, proportion = c(0.2, 0.5), by = "Species", seed = 123))) }) test_that("data_partition warns if no testing set", { diff --git a/tests/testthat/test-demean.R b/tests/testthat/test-demean.R index a2f803cd7..566bd6097 100644 --- a/tests/testthat/test-demean.R +++ b/tests/testthat/test-demean.R @@ -2,29 +2,35 @@ test_that("demean works", { df <- iris set.seed(123) - df$ID <- sample(1:4, nrow(df), replace = TRUE) # fake-ID + df$ID <- sample.int(4, nrow(df), replace = TRUE) # fake-ID set.seed(123) df$binary <- as.factor(rbinom(150, 1, 0.35)) # binary variable set.seed(123) - x <- demean(df, select = c("Sepal.Length", "Petal.Length"), group = "ID") + x <- demean(df, select = c("Sepal.Length", "Petal.Length"), by = "ID") expect_snapshot(head(x)) set.seed(123) expect_message( - x <- demean(df, select = c("Sepal.Length", "binary", "Species"), group = "ID"), + { + x <- demean(df, select = c("Sepal.Length", "binary", "Species"), by = "ID") + }, "have been coerced to numeric" ) expect_snapshot(head(x)) set.seed(123) expect_message( - y <- demean(df, select = ~ Sepal.Length + binary + Species, group = ~ID), + { + y <- demean(df, select = ~ Sepal.Length + binary + Species, by = ~ID) + }, "have been coerced to numeric" ) expect_message( - z <- demean(df, select = c("Sepal.Length", "binary", "Species"), group = "ID"), + { + z <- demean(df, select = c("Sepal.Length", "binary", "Species"), by = "ID") + }, "have been coerced to numeric" ) expect_identical(y, z) @@ -39,7 +45,7 @@ test_that("demean interaction term", { ) set.seed(123) - expect_snapshot(demean(dat, select = c("a", "x*y"), group = "ID")) + expect_snapshot(demean(dat, select = c("a", "x*y"), by = "ID")) }) test_that("demean shows message if some vars don't exist", { @@ -52,7 +58,7 @@ test_that("demean shows message if some vars don't exist", { set.seed(123) expect_message( - demean(dat, select = "foo", group = "ID"), + demean(dat, select = "foo", by = "ID"), regexp = "not found" ) }) diff --git a/tests/testthat/test-labelled_data.R b/tests/testthat/test-labelled_data.R index 0b7e37a4d..2e933e5dc 100644 --- a/tests/testthat/test-labelled_data.R +++ b/tests/testthat/test-labelled_data.R @@ -311,10 +311,10 @@ test_that("convert_to_na, labels preserved", { -# get_columns ----------------------------------- +# data_select ----------------------------------- -test_that("get_columns, labels preserved", { - x <- get_columns(efc, starts_with("c")) +test_that("data_select, labels preserved", { + x <- data_select(efc, starts_with("c")) # numeric expect_equal( attr(x$c12hour, "label", exact = TRUE), @@ -322,7 +322,7 @@ test_that("get_columns, labels preserved", { ignore_attr = TRUE ) - x <- get_columns(efc, starts_with("e")) + x <- data_select(efc, starts_with("e")) # factor expect_equal( attr(x$e42dep, "label", exact = TRUE), diff --git a/tests/testthat/test-recode_into.R b/tests/testthat/test-recode_into.R index b9b0d4da3..53c75d3a7 100644 --- a/tests/testthat/test-recode_into.R +++ b/tests/testthat/test-recode_into.R @@ -264,12 +264,14 @@ test_that("recode_into, NA doesn't need to be of exact type", { x1 <- recode_into( mpg > 10 ~ 1, gear == 5 ~ NA_real_, - data = mtcars + data = mtcars, + verbose = FALSE ) x2 <- recode_into( mpg > 10 ~ 1, gear == 5 ~ NA, - data = mtcars + data = mtcars, + verbose = FALSE ) expect_identical(x1, x2) }) diff --git a/tests/testthat/test-rescale_weights.R b/tests/testthat/test-rescale_weights.R index bcd279355..504157180 100644 --- a/tests/testthat/test-rescale_weights.R +++ b/tests/testthat/test-rescale_weights.R @@ -13,19 +13,21 @@ test_that("rescale_weights nested works as expected", { expect_snapshot( rescale_weights( data = head(nhanes_sample, n = 30), - group = c("SDMVSTRA", "SDMVPSU"), + by = c("SDMVSTRA", "SDMVPSU"), probability_weights = "WTINT2YR", nest = TRUE ) ) expect_warning( - x <- rescale_weights( - data = head(nhanes_sample), - group = "SDMVPSU", - probability_weights = "WTINT2YR", - nest = TRUE - ), + { + x <- rescale_weights( + data = head(nhanes_sample), + by = "SDMVPSU", + probability_weights = "WTINT2YR", + nest = TRUE + ) + }, "Only one group variable selected" ) @@ -33,7 +35,7 @@ test_that("rescale_weights nested works as expected", { x, rescale_weights( data = head(nhanes_sample), - group = "SDMVPSU", + by = "SDMVPSU", probability_weights = "WTINT2YR" ) ) diff --git a/tests/testthat/test-standardize_datagrid.R b/tests/testthat/test-standardize_datagrid.R index 511fe0045..35a11fdb5 100644 --- a/tests/testthat/test-standardize_datagrid.R +++ b/tests/testthat/test-standardize_datagrid.R @@ -1,11 +1,15 @@ # standardize ----------------------------------------------------- test_that("standardize.datagrid", { - x <- insight::get_datagrid(iris, at = "Sepal.Length", range = "sd", length = 3) + x <- insight::get_datagrid(iris, by = "Sepal.Length", range = "sd", length = 3) out <- standardize(x) - expect_equal(as.numeric(out$Sepal.Length), c(-1, 0, 1)) - expect_equal(as.numeric(out$Sepal.Width), c(0, 0, 0)) + expect_identical(as.numeric(out$Sepal.Length), c(-1, 0, 1), tolerance = 1e-3) + expect_identical(as.numeric(out$Sepal.Width), c(0, 0, 0), tolerance = 1e-3) - x <- insight::get_datagrid(iris, at = "Sepal.Length = c(-1, 0)") + x <- insight::get_datagrid(iris, by = "Sepal.Length = c(-1, 0)") out <- unstandardize(x, select = "Sepal.Length") - expect_equal(out$Sepal.Length[1:2], c(mean(iris$Sepal.Length) - sd(iris$Sepal.Length), mean(iris$Sepal.Length))) + expect_identical( + out$Sepal.Length[1:2], + c(mean(iris$Sepal.Length) - sd(iris$Sepal.Length), mean(iris$Sepal.Length)), + tolerance = 1e-3 + ) }) diff --git a/tests/testthat/test-text_format.R b/tests/testthat/test-text_format.R index 90dab36dd..5d4dfd4b1 100644 --- a/tests/testthat/test-text_format.R +++ b/tests/testthat/test-text_format.R @@ -1,5 +1,5 @@ test_that("text formatting helpers work as expected", { - expect_snapshot(format_text( + expect_snapshot(text_format( c( "A very long First", "Some similar long Second", @@ -10,7 +10,7 @@ test_that("text formatting helpers work as expected", { width = 20 )) - expect_snapshot(format_text( + expect_snapshot(text_format( c( "A very long First", "Some similar long Second", @@ -73,11 +73,11 @@ test_that("text formatting helpers work as expected", { test_that("text formatters respect `width` argument", { expect_snapshot({ long_text <- strrep("abc ", 100) - cat(format_text(long_text, width = 50)) - cat(format_text(long_text, width = 80)) + cat(text_format(long_text, width = 50)) + cat(text_format(long_text, width = 80)) withr::with_options(list(width = 50), code = { - cat(format_text(long_text)) + cat(text_format(long_text)) }) }) })