-
-
Notifications
You must be signed in to change notification settings - Fork 16
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* draft grouped mean * add methods * fix * fix * docs * fix printing issues * fix check issues * fix * news, desc * fix issues * add p-value * emmeans to suggests * add ci * fix, tests * snapshots * docs * add tests * docs * round weighted N * fix for weights * skip test if emmeans not installed * typo in news * fix typos in docs --------- Co-authored-by: Etienne Bacher <[email protected]>
- Loading branch information
1 parent
6e3182c
commit 57b193d
Showing
10 changed files
with
678 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
Type: Package | ||
Package: datawizard | ||
Title: Easy Data Wrangling and Statistical Transformations | ||
Version: 0.8.0.5 | ||
Version: 0.8.0.6 | ||
Authors@R: c( | ||
person("Indrajeet", "Patil", , "[email protected]", role = "aut", | ||
comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")), | ||
|
@@ -43,6 +43,7 @@ Suggests: | |
data.table, | ||
dplyr (>= 1.0), | ||
effectsize, | ||
emmeans, | ||
gamm4, | ||
ggplot2, | ||
gt, | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,294 @@ | ||
#' @title Summary of mean values by group | ||
#' @name means_by_group | ||
#' | ||
#' @description Computes summary table of means by groups. | ||
#' | ||
#' @param x A vector or a data frame. | ||
#' @param group If `x` is a numeric vector, `group` should be a factor that | ||
#' indicates the group-classifying categories. If `x` is a data frame, `group` | ||
#' should be a character string, naming the variable in `x` that is used for | ||
#' grouping. Numeric vectors are coerced to factors. Not that `group` should | ||
#' only refer to a single variable. | ||
#' @param ci Level of confidence interval for mean estimates. Default is `0.95`. | ||
#' Use `ci = NA` to suppress confidence intervals. | ||
#' @param weights If `x` is a numeric vector, `weights` should be a vector of | ||
#' weights that will be applied to weight all observations. If `x` is a data | ||
#' frame, `weights` can also be a character string indicating the name of the | ||
#' variable in `x` that should be used for weighting. Default is `NULL`, so no | ||
#' weights are used. | ||
#' @param digits Optional scalar, indicating the amount of digits after decimal | ||
#' point when rounding estimates and values. | ||
#' @param ... Currently not used | ||
#' @inheritParams find_columns | ||
#' | ||
#' @return A data frame with information on mean and further summary statistics | ||
#' for each sub-group. | ||
#' | ||
#' @details This function is comparable to `aggregate(x, group, mean)`, but provides | ||
#' some further information, including summary statistics from a One-Way-ANOVA | ||
#' using `x` as dependent and `group` as independent variable. [`emmeans::contrast()`] | ||
#' is used to get p-values for each sub-group. P-values indicate whether each | ||
#' group-mean is significantly different from the total mean. | ||
#' | ||
#' @examples | ||
#' data(efc) | ||
#' means_by_group(efc, "c12hour", "e42dep") | ||
#' | ||
#' data(iris) | ||
#' means_by_group(iris, "Sepal.Width", "Species") | ||
#' | ||
#' # weighting | ||
#' efc$weight <- abs(rnorm(n = nrow(efc), mean = 1, sd = .5)) | ||
#' means_by_group(efc, "c12hour", "e42dep", weights = "weight") | ||
#' @export | ||
means_by_group <- function(x, ...) { | ||
UseMethod("means_by_group") | ||
} | ||
|
||
|
||
#' @export | ||
means_by_group.default <- function(x, ...) { | ||
insight::format_error("`means_by_group()` does not work for objects of class `", class(x)[1], "`.") | ||
} | ||
|
||
|
||
#' @rdname means_by_group | ||
#' @export | ||
means_by_group.numeric <- function(x, | ||
group = NULL, | ||
ci = 0.95, | ||
weights = NULL, | ||
digits = NULL, | ||
...) { | ||
# sanity check for arguments | ||
|
||
# "group" must be provided | ||
if (is.null(group)) { | ||
insight::format_error("Argument `group` is missing.") | ||
} | ||
|
||
# group must be of same length as x | ||
if (length(group) != length(x)) { | ||
insight::format_error("Argument `group` must be of same length as `x`.") | ||
} | ||
|
||
# if weights are provided, must be of same length as x | ||
if (!is.null(weights) && length(weights) != length(x)) { | ||
insight::format_error("Argument `weights` must be of same length as `x`.") | ||
} | ||
|
||
# if weights are NULL, set weights to 1 | ||
if (is.null(weights)) weights <- rep(1, length(x)) | ||
|
||
# retrieve labels | ||
var_mean_label <- attr(x, "label", exact = TRUE) | ||
var_grp_label <- attr(group, "label", exact = TRUE) | ||
|
||
# if no labels present, use variable names directly | ||
if (is.null(var_mean_label)) { | ||
var_mean_label <- deparse(substitute(x)) | ||
} | ||
if (is.null(var_grp_label)) { | ||
var_grp_label <- deparse(substitute(group)) | ||
} | ||
|
||
# coerce group to factor if numeric, or convert labels to levels, if factor | ||
if (is.factor(group)) { | ||
group <- tryCatch(labels_to_levels(group, verbose = FALSE), error = function(e) group) | ||
} else { | ||
group <- to_factor(group) | ||
} | ||
|
||
data <- stats::na.omit(data.frame( | ||
x = x, | ||
group = group, | ||
weights = weights, | ||
stringsAsFactors = FALSE | ||
)) | ||
|
||
# get grouped means table | ||
out <- .means_by_group(data, ci = ci) | ||
|
||
# attributes | ||
attr(out, "var_mean_label") <- var_mean_label | ||
attr(out, "var_grp_label") <- var_grp_label | ||
attr(out, "digits") <- digits | ||
|
||
class(out) <- c("dw_groupmeans", "data.frame") | ||
out | ||
} | ||
|
||
|
||
#' @rdname means_by_group | ||
#' @export | ||
means_by_group.data.frame <- function(x, | ||
select = NULL, | ||
group = NULL, | ||
ci = 0.95, | ||
weights = NULL, | ||
digits = NULL, | ||
exclude = NULL, | ||
ignore_case = FALSE, | ||
regex = FALSE, | ||
verbose = TRUE, | ||
...) { | ||
# evaluate select/exclude, may be select-helpers | ||
select <- .select_nse(select, | ||
x, | ||
exclude, | ||
ignore_case, | ||
regex = regex, | ||
verbose = verbose | ||
) | ||
|
||
if (is.null(weights)) { | ||
w <- NULL | ||
} else if (is.character(weights)) { | ||
w <- x[[weights]] | ||
} else { | ||
w <- weights | ||
} | ||
|
||
out <- lapply(select, function(i) { | ||
# if no labels present, use variable names directy | ||
if (is.null(attr(x[[i]], "label", exact = TRUE))) { | ||
attr(x[[i]], "label") <- i | ||
} | ||
if (is.null(attr(x[[group]], "label", exact = TRUE))) { | ||
attr(x[[group]], "label") <- group | ||
} | ||
# compute means table | ||
means_by_group(x[[i]], group = x[[group]], ci = ci, weights = w, digits = digits, ...) | ||
}) | ||
|
||
class(out) <- c("dw_groupmeans_list", "list") | ||
out | ||
} | ||
|
||
|
||
#' @keywords internal | ||
.means_by_group <- function(data, ci = 0.95) { | ||
# compute anova statistics for mean table | ||
if (is.null(data$weights) || all(data$weights == 1)) { | ||
fit <- stats::lm(x ~ group, data = data) | ||
} else { | ||
fit <- stats::lm(x ~ group, weights = data$weights, data = data) | ||
} | ||
|
||
# summary table data | ||
groups <- split(data$x, data$group) | ||
group_weights <- split(data$weights, data$group) | ||
out <- do.call(rbind, Map(function(x, w) { | ||
data.frame( | ||
Mean = weighted_mean(x, weights = w), | ||
SD = weighted_sd(x, weights = w), | ||
N = round(sum(w)), | ||
stringsAsFactors = FALSE | ||
) | ||
}, groups, group_weights)) | ||
|
||
# add group names | ||
out$Category <- levels(data$group) | ||
out$p <- out$CI_high <- out$CI_low <- NA | ||
|
||
# p-values of contrast-means | ||
if (insight::check_if_installed("emmeans", quietly = TRUE)) { | ||
# create summary table of contrasts, for p-values and confidence intervals | ||
predicted <- emmeans::emmeans(fit, specs = "group", level = ci) | ||
contrasts <- emmeans::contrast(predicted, method = "eff") | ||
# add p-values and confidence intervals to "out" | ||
if (!is.null(ci) && !is.na(ci)) { | ||
summary_table <- as.data.frame(predicted) | ||
out$CI_low <- summary_table$lower.CL | ||
out$CI_high <- summary_table$upper.CL | ||
} | ||
summary_table <- as.data.frame(contrasts) | ||
out$p <- summary_table$p.value | ||
} | ||
|
||
# reorder columns | ||
out <- out[c("Category", "Mean", "N", "SD", "CI_low", "CI_high", "p")] | ||
|
||
# finally, add total-row | ||
out <- rbind( | ||
out, | ||
data.frame( | ||
Category = "Total", | ||
Mean = weighted_mean(data$x, weights = data$weights), | ||
N = nrow(data), | ||
SD = weighted_sd(data$x, weights = data$weights), | ||
CI_low = NA, | ||
CI_high = NA, | ||
p = NA, | ||
stringsAsFactors = FALSE | ||
) | ||
) | ||
|
||
# get anova statistics for mean table | ||
sum.fit <- summary(fit) | ||
|
||
# r-squared values | ||
r2 <- sum.fit$r.squared | ||
r2.adj <- sum.fit$adj.r.squared | ||
|
||
# F-statistics | ||
fstat <- sum.fit$fstatistic | ||
pval <- stats::pf(fstat[1], fstat[2], fstat[3], lower.tail = FALSE) | ||
|
||
# copy as attributes | ||
attr(out, "r2") <- r2 | ||
attr(out, "ci") <- ci | ||
attr(out, "adj.r2") <- r2.adj | ||
attr(out, "fstat") <- fstat[1] | ||
attr(out, "p.value") <- pval | ||
|
||
out | ||
} | ||
|
||
|
||
# methods ----------------- | ||
|
||
#' @export | ||
format.dw_groupmeans <- function(x, digits = NULL, ...) { | ||
if (is.null(digits)) { | ||
digits <- attr(x, "digits", exact = TRUE) | ||
} | ||
if (is.null(digits)) { | ||
digits <- 2 | ||
} | ||
x$N <- insight::format_value(x$N, digits = 0) | ||
insight::format_table(remove_empty_columns(x), digits = digits, ...) | ||
} | ||
|
||
#' @export | ||
print.dw_groupmeans <- function(x, digits = NULL, ...) { | ||
out <- format(x, digits = digits, ...) | ||
|
||
# caption | ||
l1 <- attributes(x)$var_mean_label | ||
l2 <- attributes(x)$var_grp_label | ||
if (!is.null(l1) && !is.null(l2)) { | ||
caption <- c(paste0("# Mean of ", l1, " by ", l2), "blue") | ||
} else { | ||
caption <- NULL | ||
} | ||
|
||
# footer | ||
footer <- paste0( | ||
"\nAnova: R2=", insight::format_value(attributes(x)$r2, digits = 3), | ||
"; adj.R2=", insight::format_value(attributes(x)$adj.r2, digits = 3), | ||
"; F=", insight::format_value(attributes(x)$fstat, digits = 3), | ||
"; ", insight::format_p(attributes(x)$p.value, whitespace = FALSE), | ||
"\n" | ||
) | ||
|
||
cat(insight::export_table(out, caption = caption, footer = footer, ...)) | ||
} | ||
|
||
#' @export | ||
print.dw_groupmeans_list <- function(x, digits = NULL, ...) { | ||
for (i in seq_along(x)) { | ||
if (i > 1) cat("\n") | ||
print(x[[i]], digits = digits, ...) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.