Skip to content

Commit

Permalink
Merge pull request #293 from easystats/plot_data_tabulate
Browse files Browse the repository at this point in the history
Plot method for `data_tabulate`
  • Loading branch information
bwiernik authored Aug 31, 2023
2 parents 5b80195 + 1079e2b commit cd304c4
Show file tree
Hide file tree
Showing 5 changed files with 217 additions and 0 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ S3method(data_plot,performance_pp_check)
S3method(data_plot,point_estimate)
S3method(data_plot,rope)
S3method(data_plot,see_compare_parameters)
S3method(plot,dw_data_tabulate)
S3method(plot,dw_data_tabulates)
S3method(plot,see_bayesfactor_models)
S3method(plot,see_bayesfactor_parameters)
S3method(plot,see_bayesfactor_savagedickey)
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# see (development version)

## New features

* There is now a `plot()` method for outputs of `datawizard::data_tabulate()`
(#293).

## Minor Changes

* The `print()` method for `performance::check_model()` now also evaluates the
Expand Down
137 changes: 137 additions & 0 deletions R/plot.dw_data_tabulate.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
#' Plot tabulated data.
#'
#' @param x Object created by `datawizard::data_tabulate()`.
#' @param label_values Logical. Should values and percentages be displayed at the
#' top of each bar.
#' @param show_na Should missing values be dropped? Can be `"if_any"` (default) to show
#' the missing category only if any missing values are present, `"always"` to
#' always show the missing category, or `"never"` to never show the missing
#' category.
#' @param na_label The label given to missing values when they are shown.
#' @param error_bar Logical. Should error bars be displayed?
#' If `TRUE`, confidence intervals computed using the Wilson method are shown.
#' See Brown et al. (2001) for details.
#' @param ci Confidence Interval (CI) level. Default to `0.95` (⁠95%⁠).
#' @param fill_col Color to use for category columns (default: "#87CEFA").
#' @param color_error_bar Color to use for error bars (default: "#607B8B").
#' @param ... Unused
#'
#' @references
#' Brown, L. D., Cai, T. T., & DasGupta, A. (2001).
#' Interval estimation for a binomial proportion.
#' _Statistical Science, 16_(2), 101–133. \doi{10.1214/ss/1009213286}
#'
#' @rdname plot.dw_data_tabulate
#' @export

plot.dw_data_tabulates <- function(x, label_values = TRUE,
show_na = c("if_any", "always", "never"),
na_label = "(Missing)",
error_bar = TRUE, ci = .95,
fill_col = "#87CEFA",
color_error_bar = "#607B8B",
...) {
show_na <- match.arg(show_na, choices = c("if_any", "always", "never"))
if (length(x) == 1) {
plot.dw_data_tabulate(
x[[1]], label_values = label_values,
show_na = show_na, na_label = na_label,
error_bar = error_bar, ci = ci,
fill_col = fill_col, color_error_bar = color_error_bar
)
} else {
lapply(x, plot.dw_data_tabulate,
label_values = label_values,
show_na = show_na, na_label = na_label,
error_bar = error_bar, ci = ci,
fill_col = fill_col, color_error_bar = color_error_bar
)
}
}

#' @rdname plot.dw_data_tabulate
#'
#' @export

plot.dw_data_tabulate <- function(x, label_values = TRUE,
show_na = c("if_any", "always", "never"),
na_label = "(Missing)",
error_bar = TRUE, ci = .95,
fill_col = "#87CEFA",
color_error_bar = "#607B8B",
...) {
show_na <- match.arg(show_na, choices = c("if_any", "always", "never"))
dat <- as.data.frame(x)

if (show_na == "if_any") {
if (any(is.na(dat$Value))) {
show_na <- ifelse(dat[is.na(dat$Value), "N"] > 0, "always", "never")
} else {
show_na <- "never"
}
}

if (show_na == "never") {
dat <- dat[!is.na(dat$Value), ]
dat$output <- dat[[which(startsWith(names(dat), "Valid"))]]
} else {
dat$output <- dat[[which(startsWith(names(dat), "Raw"))]]

# deal with missing values
dat$Value <- as.character(dat$Value)
dat$Value[is.na(dat$Value)] <- na_label
dat$Value <- factor(
dat$Value,
levels = c(setdiff(dat$Value, na_label), na_label)
)
}

if (isTRUE(error_bar)) {
total_n <- sum(dat$N)
props <- dat$output / 100
dat <- cbind(dat, CI = ci, .wilson_ci(prop = props, total_n = total_n, ci = ci) * total_n)
dat$label <- paste0(dat$N, " (", round(dat$output, 2), "%)")
} else {
dat$label <- paste0(dat$N, "\n(", round(dat$output, 2), "%)")
}

out <- ggplot2::ggplot(dat) +
ggplot2::aes(x = .data$Value, y = .data$N) +
ggplot2::geom_col(fill = fill_col) +
ggplot2::labs(title = unique(dat$Variable)) +
theme_modern()

if (isTRUE(label_values)) {
if (isTRUE(error_bar)) {
out <- out +
ggplot2::geom_text(ggplot2::aes(label = .data$label), vjust = -1, hjust = 1.2) +
ggplot2::coord_cartesian(ylim = c(0, max(dat$CI_high)))
} else {
out <- out +
ggplot2::geom_text(ggplot2::aes(label = .data$label), vjust = -0.5) +
ggplot2::coord_cartesian(ylim = c(0, max(dat$N) * 1.2))
}
}

# add confidence intervals for frequencies
if (isTRUE(error_bar)) {
out <- out +
ggplot2::geom_linerange(
ggplot2::aes(ymin = .data$CI_low, ymax = .data$CI_high),
color = color_error_bar
)
}

out
}

.wilson_ci <- function(prop, total_n, ci = .95) {
z <- stats::qnorm((1 - ci) / 2, lower.tail = FALSE)
z2 <- z^2
p1 <- prop + 0.5 * z2 / total_n
p2 <- z * sqrt((prop * (1 - prop) + 0.25 * z2 / total_n) / total_n)
p3 <- 1 + z2 / total_n
CI_low <- (p1 - p2) / p3
CI_high <- (p1 + p2) / p3
return(data.frame(CI_low = CI_low, CI_high = CI_high))
}
64 changes: 64 additions & 0 deletions man/plot.dw_data_tabulate.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions tests/testthat/test-plot.dw_data_tabulate.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
test_that("`plot.dw_data_tabulate()` works with single table", {
x <- datawizard::data_tabulate(mtcars, select = "cyl")
expect_s3_class(plot(x), "gg")
})

test_that("`plot.dw_data_tabulate()` works with multiple tables", {
x <- datawizard::data_tabulate(mtcars, select = c("cyl", "carb", "am"))
expect_true(is.list(plot(x)))
})

0 comments on commit cd304c4

Please sign in to comment.