From ebe48b4bd1a2cbf9f5f5a7cfcb6ba705f8750235 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 23 Jun 2024 17:54:07 +0200 Subject: [PATCH] Unexpected missing values in `data_tabulate()` (#518) * Unexpected missing values in `data_tabulate()` Fixes #514 * reverse usage * fix tests (FALSE -> TRUE) * docs, news * Update NEWS.md Co-authored-by: Indrajeet Patil * add comment --------- Co-authored-by: Indrajeet Patil --- DESCRIPTION | 2 +- NEWS.md | 5 +++ R/data_summary.R | 22 +++++----- R/data_tabulate.R | 56 ++++++++++++++------------ R/data_xtabulate.R | 30 ++++++++------ man/data_summary.Rd | 8 ++-- man/data_tabulate.Rd | 12 +++--- tests/testthat/_snaps/data_tabulate.md | 16 ++++---- tests/testthat/test-data_summary.R | 2 +- tests/testthat/test-data_tabulate.R | 30 +++++++------- 10 files changed, 98 insertions(+), 85 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0a7fb9594..c56e3f9dd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: datawizard Title: Easy Data Wrangling and Statistical Transformations -Version: 0.11.0.3 +Version: 0.11.0.4 Authors@R: c( person("Indrajeet", "Patil", , "patilindrajeet.science@gmail.com", role = "aut", comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")), diff --git a/NEWS.md b/NEWS.md index ada1f08ea..0954f1214 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,11 @@ BREAKING CHANGES +* The argument `include_na` in `data_tabulate()` and `data_summary()` has been + renamed into `remove_na`. Consequently, to mimic former behaviour, `FALSE` and + `TRUE` need to be switched (i.e. `remove_na = TRUE` is equivalent to the former + `include_na = FALSE`). + * Class names for objects returned by `data_tabulate()` have been changed to `datawizard_table` and `datawizard_crosstable` (resp. the plural forms, `*_tables`), to provide a clearer and more consistent naming scheme. diff --git a/R/data_summary.R b/R/data_summary.R index 8d15f8483..7662d0c94 100644 --- a/R/data_summary.R +++ b/R/data_summary.R @@ -8,9 +8,9 @@ #' @param by Optional character string, indicating the name of a variable in `x`. #' If supplied, the data will be split by this variable and summary statistics #' will be computed for each group. -#' @param include_na Logical. If `TRUE`, missing values are included as a level -#' in the grouping variable. If `FALSE`, missing values are omitted from the -#' grouping variable. +#' @param remove_na Logical. If `TRUE`, missing values are omitted from the +#' grouping variable. If `FALSE` (default), missing values are included as a +#' level in the grouping variable. #' @param ... One or more named expressions that define the new variable name #' and the function to compute the summary statistic. Example: #' `mean_sepal_width = mean(Sepal.Width)`. The expression can also be provided @@ -57,8 +57,8 @@ data_summary <- function(x, ...) { #' @export -data_summary.matrix <- function(x, ..., by = NULL, include_na = TRUE) { - data_summary(as.data.frame(x), ..., by = by, include_na = include_na) +data_summary.matrix <- function(x, ..., by = NULL, remove_na = FALSE) { + data_summary(as.data.frame(x), ..., by = by, remove_na = remove_na) } @@ -70,7 +70,7 @@ data_summary.default <- function(x, ...) { #' @rdname data_summary #' @export -data_summary.data.frame <- function(x, ..., by = NULL, include_na = TRUE) { +data_summary.data.frame <- function(x, ..., by = NULL, remove_na = FALSE) { dots <- eval(substitute(alist(...))) # do we have any expression at all? @@ -103,10 +103,10 @@ data_summary.data.frame <- function(x, ..., by = NULL, include_na = TRUE) { } # split data, add NA levels, if requested l <- lapply(x[by], function(i) { - if (include_na && anyNA(i)) { - addNA(i) - } else { + if (remove_na || !anyNA(i)) { i + } else { + addNA(i) } }) split_data <- split(x, l, drop = TRUE) @@ -137,7 +137,7 @@ data_summary.data.frame <- function(x, ..., by = NULL, include_na = TRUE) { #' @export -data_summary.grouped_df <- function(x, ..., by = NULL, include_na = TRUE) { +data_summary.grouped_df <- function(x, ..., by = NULL, remove_na = FALSE) { # extract group variables grps <- attr(x, "groups", exact = TRUE) group_variables <- data_remove(grps, ".rows") @@ -148,7 +148,7 @@ data_summary.grouped_df <- function(x, ..., by = NULL, include_na = TRUE) { # remove information specific to grouped df's attr(x, "groups") <- NULL class(x) <- "data.frame" - data_summary(x, ..., by = by, include_na = include_na) + data_summary(x, ..., by = by, remove_na = remove_na) } diff --git a/R/data_tabulate.R b/R/data_tabulate.R index 74f4f2e03..e94fc5d55 100644 --- a/R/data_tabulate.R +++ b/R/data_tabulate.R @@ -15,7 +15,7 @@ #' factor levels are dropped from the frequency table. #' @param name Optional character string, which includes the name that is used #' for printing. -#' @param include_na Logical, if `TRUE`, missing values are included in the +#' @param remove_na Logical, if `FALSE`, missing values are included in the #' frequency or crosstable, else missing values are omitted. #' @param collapse Logical, if `TRUE` collapses multiple tables into one larger #' table for printing. This affects only printing, not the returned object. @@ -40,7 +40,7 @@ #' (missing) values by default. The first column indicates values of `x`, the #' first row indicates values of `by` (including missing values). The last row #' and column contain the total frequencies for each row and column, respectively. -#' Setting `include_na = FALSE` will omit missing values from the crosstable. +#' Setting `remove_na = FALSE` will omit missing values from the crosstable. #' Setting `proportions` to `"row"` or `"column"` will add row or column #' percentages. Setting `proportions` to `"full"` will add relative frequencies #' for the full table. @@ -62,7 +62,7 @@ #' data_tabulate(efc$c172code) #' #' # drop missing values -#' data_tabulate(efc$c172code, include_na = FALSE) +#' data_tabulate(efc$c172code, remove_na = TRUE) #' #' # data frame #' data_tabulate(efc, c("e42dep", "c172code")) @@ -109,7 +109,7 @@ #' efc$c172code, #' by = efc$e16sex, #' proportions = "column", -#' include_na = FALSE +#' remove_na = TRUE #' ) #' #' # round percentages @@ -133,7 +133,7 @@ data_tabulate.default <- function(x, by = NULL, drop_levels = FALSE, weights = NULL, - include_na = TRUE, + remove_na = FALSE, proportions = NULL, name = NULL, verbose = TRUE, @@ -163,7 +163,7 @@ data_tabulate.default <- function(x, x, by = by, weights = weights, - include_na = include_na, + remove_na = remove_na, proportions = proportions, obj_name = obj_name, group_variable = group_variable @@ -172,30 +172,34 @@ data_tabulate.default <- function(x, # frequency table if (is.null(weights)) { - if (include_na) { - freq_table <- tryCatch(table(addNA(x)), error = function(e) NULL) - } else { + if (remove_na) { + # we have a `.default` and a `.data.frame` method for `data_tabulate()`. + # since this is the default, `x` can be an object which cannot be used + # with `table()`, that's why we add `tryCatch()` here. Below we give an + # informative error message for non-supported objects. freq_table <- tryCatch(table(x), error = function(e) NULL) + } else { + freq_table <- tryCatch(table(addNA(x)), error = function(e) NULL) } - } else if (include_na) { - # weighted frequency table, including NA + } else if (remove_na) { + # weighted frequency table, excluding NA freq_table <- tryCatch( stats::xtabs( weights ~ x, - data = data.frame(weights = weights, x = addNA(x)), - na.action = stats::na.pass, - addNA = TRUE + data = data.frame(weights = weights, x = x), + na.action = stats::na.omit, + addNA = FALSE ), error = function(e) NULL ) } else { - # weighted frequency table, excluding NA + # weighted frequency table, including NA freq_table <- tryCatch( stats::xtabs( weights ~ x, - data = data.frame(weights = weights, x = x), - na.action = stats::na.omit, - addNA = FALSE + data = data.frame(weights = weights, x = addNA(x)), + na.action = stats::na.pass, + addNA = TRUE ), error = function(e) NULL ) @@ -218,12 +222,12 @@ data_tabulate.default <- function(x, out$`Raw %` <- 100 * out$N / sum(out$N) # if we have missing values, we add a row with NA - if (include_na) { - out$`Valid %` <- c(100 * out$N[-nrow(out)] / sum(out$N[-nrow(out)]), NA) - valid_n <- sum(out$N[-length(out$N)], na.rm = TRUE) - } else { + if (remove_na) { out$`Valid %` <- 100 * out$N / sum(out$N) valid_n <- sum(out$N, na.rm = TRUE) + } else { + out$`Valid %` <- c(100 * out$N[-nrow(out)] / sum(out$N[-nrow(out)]), NA) + valid_n <- sum(out$N[-length(out$N)], na.rm = TRUE) } out$`Cumulative %` <- cumsum(out$`Valid %`) @@ -271,7 +275,7 @@ data_tabulate.data.frame <- function(x, by = NULL, drop_levels = FALSE, weights = NULL, - include_na = TRUE, + remove_na = FALSE, proportions = NULL, collapse = FALSE, verbose = TRUE, @@ -297,7 +301,7 @@ data_tabulate.data.frame <- function(x, proportions = proportions, drop_levels = drop_levels, weights = weights, - include_na = include_na, + remove_na = remove_na, name = i, verbose = verbose, ... @@ -326,7 +330,7 @@ data_tabulate.grouped_df <- function(x, proportions = NULL, drop_levels = FALSE, weights = NULL, - include_na = TRUE, + remove_na = FALSE, collapse = FALSE, verbose = TRUE, ...) { @@ -362,7 +366,7 @@ data_tabulate.grouped_df <- function(x, verbose = verbose, drop_levels = drop_levels, weights = weights, - include_na = include_na, + remove_na = remove_na, by = by, proportions = proportions, group_variable = group_variable, diff --git a/R/data_xtabulate.R b/R/data_xtabulate.R index 5c387ff95..08be1eeca 100644 --- a/R/data_xtabulate.R +++ b/R/data_xtabulate.R @@ -3,7 +3,7 @@ .crosstable <- function(x, by, weights = NULL, - include_na = TRUE, + remove_na = FALSE, proportions = NULL, obj_name = NULL, group_variable = NULL) { @@ -12,30 +12,34 @@ } # frequency table if (is.null(weights)) { - if (include_na) { - x_table <- tryCatch(table(addNA(x), addNA(by)), error = function(e) NULL) - } else { + # we have a `.default` and a `.data.frame` method for `data_tabulate()`. + # since this is the default, `x` can be an object which cannot be used + # with `table()`, that's why we add `tryCatch()` here. Below we give an + # informative error message for non-supported objects. + if (remove_na) { x_table <- tryCatch(table(x, by), error = function(e) NULL) + } else { + x_table <- tryCatch(table(addNA(x), addNA(by)), error = function(e) NULL) } - } else if (include_na) { - # weighted frequency table, including NA + } else if (remove_na) { + # weighted frequency table, excluding NA x_table <- tryCatch( stats::xtabs( weights ~ x + by, - data = data.frame(weights = weights, x = addNA(x), by = addNA(by)), - na.action = stats::na.pass, - addNA = TRUE + data = data.frame(weights = weights, x = x, by = by), + na.action = stats::na.omit, + addNA = FALSE ), error = function(e) NULL ) } else { - # weighted frequency table, excluding NA + # weighted frequency table, including NA x_table <- tryCatch( stats::xtabs( weights ~ x + by, - data = data.frame(weights = weights, x = x, by = by), - na.action = stats::na.omit, - addNA = FALSE + data = data.frame(weights = weights, x = addNA(x), by = addNA(by)), + na.action = stats::na.pass, + addNA = TRUE ), error = function(e) NULL ) diff --git a/man/data_summary.Rd b/man/data_summary.Rd index ccbf4c524..24cfa1a9f 100644 --- a/man/data_summary.Rd +++ b/man/data_summary.Rd @@ -7,7 +7,7 @@ \usage{ data_summary(x, ...) -\method{data_summary}{data.frame}(x, ..., by = NULL, include_na = TRUE) +\method{data_summary}{data.frame}(x, ..., by = NULL, remove_na = FALSE) } \arguments{ \item{x}{A (grouped) data frame.} @@ -22,9 +22,9 @@ summary function \code{n()} can be used to count the number of observations.} If supplied, the data will be split by this variable and summary statistics will be computed for each group.} -\item{include_na}{Logical. If \code{TRUE}, missing values are included as a level -in the grouping variable. If \code{FALSE}, missing values are omitted from the -grouping variable.} +\item{remove_na}{Logical. If \code{TRUE}, missing values are omitted from the +grouping variable. If \code{FALSE} (default), missing values are included as a +level in the grouping variable.} } \value{ A data frame with the requested summary statistics. diff --git a/man/data_tabulate.Rd b/man/data_tabulate.Rd index 3f17bb21c..2feadf3a9 100644 --- a/man/data_tabulate.Rd +++ b/man/data_tabulate.Rd @@ -14,7 +14,7 @@ data_tabulate(x, ...) by = NULL, drop_levels = FALSE, weights = NULL, - include_na = TRUE, + remove_na = FALSE, proportions = NULL, name = NULL, verbose = TRUE, @@ -30,7 +30,7 @@ data_tabulate(x, ...) by = NULL, drop_levels = FALSE, weights = NULL, - include_na = TRUE, + remove_na = FALSE, proportions = NULL, collapse = FALSE, verbose = TRUE, @@ -62,7 +62,7 @@ factor levels are dropped from the frequency table.} \item{weights}{Optional numeric vector of weights. Must be of the same length as \code{x}. If \code{weights} is supplied, weighted frequencies are calculated.} -\item{include_na}{Logical, if \code{TRUE}, missing values are included in the +\item{remove_na}{Logical, if \code{FALSE}, missing values are included in the frequency or crosstable, else missing values are omitted.} \item{proportions}{Optional character string, indicating the type of @@ -173,7 +173,7 @@ If \code{by} is supplied, a crosstable is created. The crosstable includes \verb (missing) values by default. The first column indicates values of \code{x}, the first row indicates values of \code{by} (including missing values). The last row and column contain the total frequencies for each row and column, respectively. -Setting \code{include_na = FALSE} will omit missing values from the crosstable. +Setting \code{remove_na = FALSE} will omit missing values from the crosstable. Setting \code{proportions} to \code{"row"} or \code{"column"} will add row or column percentages. Setting \code{proportions} to \code{"full"} will add relative frequencies for the full table. @@ -189,7 +189,7 @@ data(efc) data_tabulate(efc$c172code) # drop missing values -data_tabulate(efc$c172code, include_na = FALSE) +data_tabulate(efc$c172code, remove_na = TRUE) # data frame data_tabulate(efc, c("e42dep", "c172code")) @@ -236,7 +236,7 @@ data_tabulate( efc$c172code, by = efc$e16sex, proportions = "column", - include_na = FALSE + remove_na = TRUE ) # round percentages diff --git a/tests/testthat/_snaps/data_tabulate.md b/tests/testthat/_snaps/data_tabulate.md index 59a20dc01..ffde63088 100644 --- a/tests/testthat/_snaps/data_tabulate.md +++ b/tests/testthat/_snaps/data_tabulate.md @@ -259,7 +259,7 @@ Code print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", - include_na = FALSE)) + remove_na = TRUE)) Output efc$c172code | male | female | Total -------------+------------+------------+------ @@ -288,7 +288,7 @@ Code print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", - include_na = FALSE, weights = efc$weights)) + remove_na = TRUE, weights = efc$weights)) Output efc$c172code | male | female | Total -------------+------------+------------+------ @@ -317,7 +317,7 @@ Code print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", - include_na = FALSE)) + remove_na = TRUE)) Output c172code | male | female | Total ---------+------------+------------+------ @@ -348,7 +348,7 @@ Code print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", - include_na = FALSE, weights = efc$weights)) + remove_na = TRUE, weights = efc$weights)) Output c172code | male | female | Total ---------+------------+------------+------ @@ -378,7 +378,7 @@ Code print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column", - include_na = FALSE)) + remove_na = TRUE)) Output c172code | male | female | Total ---------+------------+------------+------ @@ -409,7 +409,7 @@ Code print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column", - include_na = FALSE, weights = "weights")) + remove_na = TRUE, weights = "weights")) Output c172code | male | female | Total ---------+------------+------------+------ @@ -497,7 +497,7 @@ Code print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", - include_na = FALSE)) + remove_na = TRUE)) Output [1] "|efc$c172code | male| female| Total|" [2] "|:------------|----------:|----------:|-----:|" @@ -534,7 +534,7 @@ Code print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", - include_na = FALSE, weights = efc$weights)) + remove_na = TRUE, weights = efc$weights)) Output [1] "|efc$c172code | male| female| Total|" [2] "|:------------|----------:|----------:|-----:|" diff --git a/tests/testthat/test-data_summary.R b/tests/testthat/test-data_summary.R index 746d4c51a..c60b142d2 100644 --- a/tests/testthat/test-data_summary.R +++ b/tests/testthat/test-data_summary.R @@ -175,7 +175,7 @@ test_that("data_summary, with NA", { data(efc, package = "datawizard") out <- data_summary(efc, MW = mean(c12hour, na.rm = TRUE), by = "c172code") expect_snapshot(print(out)) - out <- data_summary(efc, MW = mean(c12hour, na.rm = TRUE), by = "c172code", include_na = FALSE) + out <- data_summary(efc, MW = mean(c12hour, na.rm = TRUE), by = "c172code", remove_na = TRUE) expect_snapshot(print(out)) # sorting for multiple groups out <- data_summary(efc, MW = mean(c12hour, na.rm = TRUE), by = c("e42dep", "c172code")) diff --git a/tests/testthat/test-data_tabulate.R b/tests/testthat/test-data_tabulate.R index 5108e29c2..9848d42b9 100644 --- a/tests/testthat/test-data_tabulate.R +++ b/tests/testthat/test-data_tabulate.R @@ -287,11 +287,11 @@ test_that("data_tabulate exclude/include missing values", { efc$e16sex[sample.int(nrow(efc), 5)] <- NA out <- data_tabulate(efc$c172code) expect_identical(out$N, c(8L, 66L, 16L, 10L)) - out <- data_tabulate(efc$c172code, include_na = FALSE) + out <- data_tabulate(efc$c172code, remove_na = TRUE) expect_identical(out$N, c(8L, 66L, 16L)) out <- data_tabulate(efc$c172code, weights = efc$weights) expect_identical(out$N, c(10, 67, 15, 13)) - out <- data_tabulate(efc$c172code, include_na = FALSE, weights = efc$weights) + out <- data_tabulate(efc$c172code, remove_na = TRUE, weights = efc$weights) expect_identical(out$N, c(10, 67, 15)) }) @@ -305,17 +305,17 @@ test_that("data_tabulate, cross tables", { efc$e16sex[sample.int(nrow(efc), 5)] <- NA expect_snapshot(print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full"))) - expect_snapshot(print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", include_na = FALSE))) + expect_snapshot(print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", remove_na = TRUE))) expect_snapshot(print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", weights = efc$weights))) - expect_snapshot(print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", include_na = FALSE, weights = efc$weights))) # nolint + expect_snapshot(print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", remove_na = TRUE, weights = efc$weights))) # nolint expect_snapshot(print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row"))) - expect_snapshot(print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", include_na = FALSE))) + expect_snapshot(print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", remove_na = TRUE))) expect_snapshot(print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", weights = efc$weights))) - expect_snapshot(print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", include_na = FALSE, weights = efc$weights))) # nolint + expect_snapshot(print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", remove_na = TRUE, weights = efc$weights))) # nolint expect_snapshot(print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column"))) - expect_snapshot(print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column", include_na = FALSE))) + expect_snapshot(print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column", remove_na = TRUE))) expect_snapshot(print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column", weights = "weights"))) - expect_snapshot(print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column", include_na = FALSE, weights = "weights"))) # nolint + expect_snapshot(print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column", remove_na = TRUE, weights = "weights"))) # nolint }) test_that("data_tabulate, cross tables, HTML", { @@ -326,11 +326,11 @@ test_that("data_tabulate, cross tables, HTML", { efc$e16sex[sample.int(nrow(efc), 5)] <- NA expect_s3_class(print_html(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full")), "gt_tbl") - expect_s3_class(print_html(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", include_na = FALSE)), "gt_tbl") # nolint + expect_s3_class(print_html(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", remove_na = TRUE)), "gt_tbl") # nolint expect_s3_class(print_html(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", weights = efc$weights)), "gt_tbl") # nolint - expect_s3_class(print_html(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", include_na = FALSE, weights = efc$weights)), "gt_tbl") # nolint + expect_s3_class(print_html(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", remove_na = TRUE, weights = efc$weights)), "gt_tbl") # nolint expect_s3_class(print_html(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row")), "gt_tbl") - expect_s3_class(print_html(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", include_na = FALSE, weights = efc$weights)), "gt_tbl") # nolint + expect_s3_class(print_html(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", remove_na = TRUE, weights = efc$weights)), "gt_tbl") # nolint }) test_that("data_tabulate, cross tables, grouped df", { @@ -377,9 +377,9 @@ test_that("data_tabulate, cross tables, markdown", { efc$e16sex[sample.int(nrow(efc), 5)] <- NA expect_snapshot(print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full"))) - expect_snapshot(print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", include_na = FALSE))) + expect_snapshot(print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", remove_na = TRUE))) expect_snapshot(print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", weights = efc$weights))) - expect_snapshot(print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", include_na = FALSE, weights = efc$weights))) # nolint + expect_snapshot(print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", remove_na = TRUE, weights = efc$weights))) # nolint }) @@ -389,12 +389,12 @@ test_that("data_tabulate, validate against table", { data(mtcars) # frequency table out1 <- as.data.frame(table(mtcars$cyl)) - out2 <- data_tabulate(mtcars$cyl, include_na = FALSE) + out2 <- data_tabulate(mtcars$cyl, remove_na = TRUE) expect_identical(out1$Freq, out2$N) # crosstable out1 <- data_arrange(as.data.frame(table(mtcars$cyl, mtcars$gear)), c("Var1", "Var2")) out2 <- data_rename(data_to_long( - as.data.frame(data_tabulate(mtcars$cyl, by = mtcars$gear, include_na = FALSE)), 2:4, + as.data.frame(data_tabulate(mtcars$cyl, by = mtcars$gear, remove_na = TRUE)), 2:4, names_to = "Var2", values_to = "Freq" ), "mtcars$cyl", "Var1") out1[[2]] <- as.character(out1[[2]])