From ebe48b4bd1a2cbf9f5f5a7cfcb6ba705f8750235 Mon Sep 17 00:00:00 2001
From: Daniel <mail@danielluedecke.de>
Date: Sun, 23 Jun 2024 17:54:07 +0200
Subject: [PATCH] Unexpected missing values in `data_tabulate()` (#518)

* Unexpected missing values in `data_tabulate()`
Fixes #514

* reverse usage

* fix tests (FALSE -> TRUE)

* docs, news

* Update NEWS.md

Co-authored-by: Indrajeet Patil <patilindrajeet.science@gmail.com>

* add  comment

---------

Co-authored-by: Indrajeet Patil <patilindrajeet.science@gmail.com>
---
 DESCRIPTION                            |  2 +-
 NEWS.md                                |  5 +++
 R/data_summary.R                       | 22 +++++-----
 R/data_tabulate.R                      | 56 ++++++++++++++------------
 R/data_xtabulate.R                     | 30 ++++++++------
 man/data_summary.Rd                    |  8 ++--
 man/data_tabulate.Rd                   | 12 +++---
 tests/testthat/_snaps/data_tabulate.md | 16 ++++----
 tests/testthat/test-data_summary.R     |  2 +-
 tests/testthat/test-data_tabulate.R    | 30 +++++++-------
 10 files changed, 98 insertions(+), 85 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 0a7fb9594..c56e3f9dd 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: datawizard
 Title: Easy Data Wrangling and Statistical Transformations
-Version: 0.11.0.3
+Version: 0.11.0.4
 Authors@R: c(
     person("Indrajeet", "Patil", , "patilindrajeet.science@gmail.com", role = "aut",
            comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")),
diff --git a/NEWS.md b/NEWS.md
index ada1f08ea..0954f1214 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -2,6 +2,11 @@
 
 BREAKING CHANGES
 
+* The argument `include_na` in `data_tabulate()` and `data_summary()` has been
+  renamed into `remove_na`. Consequently, to mimic former behaviour, `FALSE` and
+  `TRUE` need to be switched (i.e. `remove_na = TRUE` is equivalent to the former
+  `include_na = FALSE`).
+
 * Class names for objects returned by `data_tabulate()` have been changed to
   `datawizard_table` and `datawizard_crosstable` (resp. the plural forms,
   `*_tables`), to provide a clearer and more consistent naming scheme.
diff --git a/R/data_summary.R b/R/data_summary.R
index 8d15f8483..7662d0c94 100644
--- a/R/data_summary.R
+++ b/R/data_summary.R
@@ -8,9 +8,9 @@
 #' @param by Optional character string, indicating the name of a variable in `x`.
 #' If supplied, the data will be split by this variable and summary statistics
 #' will be computed for each group.
-#' @param include_na Logical. If `TRUE`, missing values are included as a level
-#' in the grouping variable. If `FALSE`, missing values are omitted from the
-#' grouping variable.
+#' @param remove_na Logical. If `TRUE`, missing values are omitted from the
+#' grouping variable. If `FALSE` (default), missing values are included as a
+#' level in the grouping variable.
 #' @param ... One or more named expressions that define the new variable name
 #' and the function to compute the summary statistic. Example:
 #' `mean_sepal_width = mean(Sepal.Width)`. The expression can also be provided
@@ -57,8 +57,8 @@ data_summary <- function(x, ...) {
 
 
 #' @export
-data_summary.matrix <- function(x, ..., by = NULL, include_na = TRUE) {
-  data_summary(as.data.frame(x), ..., by = by, include_na = include_na)
+data_summary.matrix <- function(x, ..., by = NULL, remove_na = FALSE) {
+  data_summary(as.data.frame(x), ..., by = by, remove_na = remove_na)
 }
 
 
@@ -70,7 +70,7 @@ data_summary.default <- function(x, ...) {
 
 #' @rdname data_summary
 #' @export
-data_summary.data.frame <- function(x, ..., by = NULL, include_na = TRUE) {
+data_summary.data.frame <- function(x, ..., by = NULL, remove_na = FALSE) {
   dots <- eval(substitute(alist(...)))
 
   # do we have any expression at all?
@@ -103,10 +103,10 @@ data_summary.data.frame <- function(x, ..., by = NULL, include_na = TRUE) {
     }
     # split data, add NA levels, if requested
     l <- lapply(x[by], function(i) {
-      if (include_na && anyNA(i)) {
-        addNA(i)
-      } else {
+      if (remove_na || !anyNA(i)) {
         i
+      } else {
+        addNA(i)
       }
     })
     split_data <- split(x, l, drop = TRUE)
@@ -137,7 +137,7 @@ data_summary.data.frame <- function(x, ..., by = NULL, include_na = TRUE) {
 
 
 #' @export
-data_summary.grouped_df <- function(x, ..., by = NULL, include_na = TRUE) {
+data_summary.grouped_df <- function(x, ..., by = NULL, remove_na = FALSE) {
   # extract group variables
   grps <- attr(x, "groups", exact = TRUE)
   group_variables <- data_remove(grps, ".rows")
@@ -148,7 +148,7 @@ data_summary.grouped_df <- function(x, ..., by = NULL, include_na = TRUE) {
   # remove information specific to grouped df's
   attr(x, "groups") <- NULL
   class(x) <- "data.frame"
-  data_summary(x, ..., by = by, include_na = include_na)
+  data_summary(x, ..., by = by, remove_na = remove_na)
 }
 
 
diff --git a/R/data_tabulate.R b/R/data_tabulate.R
index 74f4f2e03..e94fc5d55 100644
--- a/R/data_tabulate.R
+++ b/R/data_tabulate.R
@@ -15,7 +15,7 @@
 #' factor levels are dropped from the frequency table.
 #' @param name Optional character string, which includes the name that is used
 #' for printing.
-#' @param include_na Logical, if `TRUE`, missing values are included in the
+#' @param remove_na Logical, if `FALSE`, missing values are included in the
 #' frequency or crosstable, else missing values are omitted.
 #' @param collapse Logical, if `TRUE` collapses multiple tables into one larger
 #' table for printing. This affects only printing, not the returned object.
@@ -40,7 +40,7 @@
 #' (missing) values by default. The first column indicates values of `x`, the
 #' first row indicates values of `by` (including missing values). The last row
 #' and column contain the total frequencies for each row and column, respectively.
-#' Setting `include_na = FALSE` will omit missing values from the crosstable.
+#' Setting `remove_na = FALSE` will omit missing values from the crosstable.
 #' Setting `proportions` to `"row"` or `"column"` will add row or column
 #' percentages. Setting `proportions` to `"full"` will add relative frequencies
 #' for the full table.
@@ -62,7 +62,7 @@
 #' data_tabulate(efc$c172code)
 #'
 #' # drop missing values
-#' data_tabulate(efc$c172code, include_na = FALSE)
+#' data_tabulate(efc$c172code, remove_na = TRUE)
 #'
 #' # data frame
 #' data_tabulate(efc, c("e42dep", "c172code"))
@@ -109,7 +109,7 @@
 #'   efc$c172code,
 #'   by = efc$e16sex,
 #'   proportions = "column",
-#'   include_na = FALSE
+#'   remove_na = TRUE
 #' )
 #'
 #' # round percentages
@@ -133,7 +133,7 @@ data_tabulate.default <- function(x,
                                   by = NULL,
                                   drop_levels = FALSE,
                                   weights = NULL,
-                                  include_na = TRUE,
+                                  remove_na = FALSE,
                                   proportions = NULL,
                                   name = NULL,
                                   verbose = TRUE,
@@ -163,7 +163,7 @@ data_tabulate.default <- function(x,
       x,
       by = by,
       weights = weights,
-      include_na = include_na,
+      remove_na = remove_na,
       proportions = proportions,
       obj_name = obj_name,
       group_variable = group_variable
@@ -172,30 +172,34 @@ data_tabulate.default <- function(x,
 
   # frequency table
   if (is.null(weights)) {
-    if (include_na) {
-      freq_table <- tryCatch(table(addNA(x)), error = function(e) NULL)
-    } else {
+    if (remove_na) {
+      # we have a `.default` and a `.data.frame` method for `data_tabulate()`.
+      # since this is the default, `x` can be an object which cannot be used
+      # with `table()`, that's why we add `tryCatch()` here. Below we give an
+      # informative error message for non-supported objects.
       freq_table <- tryCatch(table(x), error = function(e) NULL)
+    } else {
+      freq_table <- tryCatch(table(addNA(x)), error = function(e) NULL)
     }
-  } else if (include_na) {
-    # weighted frequency table, including NA
+  } else if (remove_na) {
+    # weighted frequency table, excluding NA
     freq_table <- tryCatch(
       stats::xtabs(
         weights ~ x,
-        data = data.frame(weights = weights, x = addNA(x)),
-        na.action = stats::na.pass,
-        addNA = TRUE
+        data = data.frame(weights = weights, x = x),
+        na.action = stats::na.omit,
+        addNA = FALSE
       ),
       error = function(e) NULL
     )
   } else {
-    # weighted frequency table, excluding NA
+    # weighted frequency table, including NA
     freq_table <- tryCatch(
       stats::xtabs(
         weights ~ x,
-        data = data.frame(weights = weights, x = x),
-        na.action = stats::na.omit,
-        addNA = FALSE
+        data = data.frame(weights = weights, x = addNA(x)),
+        na.action = stats::na.pass,
+        addNA = TRUE
       ),
       error = function(e) NULL
     )
@@ -218,12 +222,12 @@ data_tabulate.default <- function(x,
 
   out$`Raw %` <- 100 * out$N / sum(out$N)
   # if we have missing values, we add a row with NA
-  if (include_na) {
-    out$`Valid %` <- c(100 * out$N[-nrow(out)] / sum(out$N[-nrow(out)]), NA)
-    valid_n <- sum(out$N[-length(out$N)], na.rm = TRUE)
-  } else {
+  if (remove_na) {
     out$`Valid %` <- 100 * out$N / sum(out$N)
     valid_n <- sum(out$N, na.rm = TRUE)
+  } else {
+    out$`Valid %` <- c(100 * out$N[-nrow(out)] / sum(out$N[-nrow(out)]), NA)
+    valid_n <- sum(out$N[-length(out$N)], na.rm = TRUE)
   }
   out$`Cumulative %` <- cumsum(out$`Valid %`)
 
@@ -271,7 +275,7 @@ data_tabulate.data.frame <- function(x,
                                      by = NULL,
                                      drop_levels = FALSE,
                                      weights = NULL,
-                                     include_na = TRUE,
+                                     remove_na = FALSE,
                                      proportions = NULL,
                                      collapse = FALSE,
                                      verbose = TRUE,
@@ -297,7 +301,7 @@ data_tabulate.data.frame <- function(x,
       proportions = proportions,
       drop_levels = drop_levels,
       weights = weights,
-      include_na = include_na,
+      remove_na = remove_na,
       name = i,
       verbose = verbose,
       ...
@@ -326,7 +330,7 @@ data_tabulate.grouped_df <- function(x,
                                      proportions = NULL,
                                      drop_levels = FALSE,
                                      weights = NULL,
-                                     include_na = TRUE,
+                                     remove_na = FALSE,
                                      collapse = FALSE,
                                      verbose = TRUE,
                                      ...) {
@@ -362,7 +366,7 @@ data_tabulate.grouped_df <- function(x,
       verbose = verbose,
       drop_levels = drop_levels,
       weights = weights,
-      include_na = include_na,
+      remove_na = remove_na,
       by = by,
       proportions = proportions,
       group_variable = group_variable,
diff --git a/R/data_xtabulate.R b/R/data_xtabulate.R
index 5c387ff95..08be1eeca 100644
--- a/R/data_xtabulate.R
+++ b/R/data_xtabulate.R
@@ -3,7 +3,7 @@
 .crosstable <- function(x,
                         by,
                         weights = NULL,
-                        include_na = TRUE,
+                        remove_na = FALSE,
                         proportions = NULL,
                         obj_name = NULL,
                         group_variable = NULL) {
@@ -12,30 +12,34 @@
   }
   # frequency table
   if (is.null(weights)) {
-    if (include_na) {
-      x_table <- tryCatch(table(addNA(x), addNA(by)), error = function(e) NULL)
-    } else {
+    # we have a `.default` and a `.data.frame` method for `data_tabulate()`.
+    # since this is the default, `x` can be an object which cannot be used
+    # with `table()`, that's why we add `tryCatch()` here. Below we give an
+    # informative error message for non-supported objects.
+    if (remove_na) {
       x_table <- tryCatch(table(x, by), error = function(e) NULL)
+    } else {
+      x_table <- tryCatch(table(addNA(x), addNA(by)), error = function(e) NULL)
     }
-  } else if (include_na) {
-    # weighted frequency table, including NA
+  } else if (remove_na) {
+    # weighted frequency table, excluding NA
     x_table <- tryCatch(
       stats::xtabs(
         weights ~ x + by,
-        data = data.frame(weights = weights, x = addNA(x), by = addNA(by)),
-        na.action = stats::na.pass,
-        addNA = TRUE
+        data = data.frame(weights = weights, x = x, by = by),
+        na.action = stats::na.omit,
+        addNA = FALSE
       ),
       error = function(e) NULL
     )
   } else {
-    # weighted frequency table, excluding NA
+    # weighted frequency table, including NA
     x_table <- tryCatch(
       stats::xtabs(
         weights ~ x + by,
-        data = data.frame(weights = weights, x = x, by = by),
-        na.action = stats::na.omit,
-        addNA = FALSE
+        data = data.frame(weights = weights, x = addNA(x), by = addNA(by)),
+        na.action = stats::na.pass,
+        addNA = TRUE
       ),
       error = function(e) NULL
     )
diff --git a/man/data_summary.Rd b/man/data_summary.Rd
index ccbf4c524..24cfa1a9f 100644
--- a/man/data_summary.Rd
+++ b/man/data_summary.Rd
@@ -7,7 +7,7 @@
 \usage{
 data_summary(x, ...)
 
-\method{data_summary}{data.frame}(x, ..., by = NULL, include_na = TRUE)
+\method{data_summary}{data.frame}(x, ..., by = NULL, remove_na = FALSE)
 }
 \arguments{
 \item{x}{A (grouped) data frame.}
@@ -22,9 +22,9 @@ summary function \code{n()} can be used to count the number of observations.}
 If supplied, the data will be split by this variable and summary statistics
 will be computed for each group.}
 
-\item{include_na}{Logical. If \code{TRUE}, missing values are included as a level
-in the grouping variable. If \code{FALSE}, missing values are omitted from the
-grouping variable.}
+\item{remove_na}{Logical. If \code{TRUE}, missing values are omitted from the
+grouping variable. If \code{FALSE} (default), missing values are included as a
+level in the grouping variable.}
 }
 \value{
 A data frame with the requested summary statistics.
diff --git a/man/data_tabulate.Rd b/man/data_tabulate.Rd
index 3f17bb21c..2feadf3a9 100644
--- a/man/data_tabulate.Rd
+++ b/man/data_tabulate.Rd
@@ -14,7 +14,7 @@ data_tabulate(x, ...)
   by = NULL,
   drop_levels = FALSE,
   weights = NULL,
-  include_na = TRUE,
+  remove_na = FALSE,
   proportions = NULL,
   name = NULL,
   verbose = TRUE,
@@ -30,7 +30,7 @@ data_tabulate(x, ...)
   by = NULL,
   drop_levels = FALSE,
   weights = NULL,
-  include_na = TRUE,
+  remove_na = FALSE,
   proportions = NULL,
   collapse = FALSE,
   verbose = TRUE,
@@ -62,7 +62,7 @@ factor levels are dropped from the frequency table.}
 \item{weights}{Optional numeric vector of weights. Must be of the same length
 as \code{x}. If \code{weights} is supplied, weighted frequencies are calculated.}
 
-\item{include_na}{Logical, if \code{TRUE}, missing values are included in the
+\item{remove_na}{Logical, if \code{FALSE}, missing values are included in the
 frequency or crosstable, else missing values are omitted.}
 
 \item{proportions}{Optional character string, indicating the type of
@@ -173,7 +173,7 @@ If \code{by} is supplied, a crosstable is created. The crosstable includes \verb
 (missing) values by default. The first column indicates values of \code{x}, the
 first row indicates values of \code{by} (including missing values). The last row
 and column contain the total frequencies for each row and column, respectively.
-Setting \code{include_na = FALSE} will omit missing values from the crosstable.
+Setting \code{remove_na = FALSE} will omit missing values from the crosstable.
 Setting \code{proportions} to \code{"row"} or \code{"column"} will add row or column
 percentages. Setting \code{proportions} to \code{"full"} will add relative frequencies
 for the full table.
@@ -189,7 +189,7 @@ data(efc)
 data_tabulate(efc$c172code)
 
 # drop missing values
-data_tabulate(efc$c172code, include_na = FALSE)
+data_tabulate(efc$c172code, remove_na = TRUE)
 
 # data frame
 data_tabulate(efc, c("e42dep", "c172code"))
@@ -236,7 +236,7 @@ data_tabulate(
   efc$c172code,
   by = efc$e16sex,
   proportions = "column",
-  include_na = FALSE
+  remove_na = TRUE
 )
 
 # round percentages
diff --git a/tests/testthat/_snaps/data_tabulate.md b/tests/testthat/_snaps/data_tabulate.md
index 59a20dc01..ffde63088 100644
--- a/tests/testthat/_snaps/data_tabulate.md
+++ b/tests/testthat/_snaps/data_tabulate.md
@@ -259,7 +259,7 @@
 
     Code
       print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full",
-      include_na = FALSE))
+      remove_na = TRUE))
     Output
       efc$c172code |       male |     female | Total
       -------------+------------+------------+------
@@ -288,7 +288,7 @@
 
     Code
       print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full",
-      include_na = FALSE, weights = efc$weights))
+      remove_na = TRUE, weights = efc$weights))
     Output
       efc$c172code |       male |     female | Total
       -------------+------------+------------+------
@@ -317,7 +317,7 @@
 
     Code
       print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row",
-      include_na = FALSE))
+      remove_na = TRUE))
     Output
       c172code |       male |     female | Total
       ---------+------------+------------+------
@@ -348,7 +348,7 @@
 
     Code
       print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row",
-      include_na = FALSE, weights = efc$weights))
+      remove_na = TRUE, weights = efc$weights))
     Output
       c172code |       male |     female | Total
       ---------+------------+------------+------
@@ -378,7 +378,7 @@
 
     Code
       print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column",
-        include_na = FALSE))
+        remove_na = TRUE))
     Output
       c172code |       male |     female | Total
       ---------+------------+------------+------
@@ -409,7 +409,7 @@
 
     Code
       print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column",
-        include_na = FALSE, weights = "weights"))
+        remove_na = TRUE, weights = "weights"))
     Output
       c172code |       male |     female | Total
       ---------+------------+------------+------
@@ -497,7 +497,7 @@
 
     Code
       print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full",
-      include_na = FALSE))
+      remove_na = TRUE))
     Output
       [1] "|efc$c172code |       male|     female| Total|"
       [2] "|:------------|----------:|----------:|-----:|"
@@ -534,7 +534,7 @@
 
     Code
       print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full",
-      include_na = FALSE, weights = efc$weights))
+      remove_na = TRUE, weights = efc$weights))
     Output
       [1] "|efc$c172code |       male|     female| Total|"
       [2] "|:------------|----------:|----------:|-----:|"
diff --git a/tests/testthat/test-data_summary.R b/tests/testthat/test-data_summary.R
index 746d4c51a..c60b142d2 100644
--- a/tests/testthat/test-data_summary.R
+++ b/tests/testthat/test-data_summary.R
@@ -175,7 +175,7 @@ test_that("data_summary, with NA", {
   data(efc, package = "datawizard")
   out <- data_summary(efc, MW = mean(c12hour, na.rm = TRUE), by = "c172code")
   expect_snapshot(print(out))
-  out <- data_summary(efc, MW = mean(c12hour, na.rm = TRUE), by = "c172code", include_na = FALSE)
+  out <- data_summary(efc, MW = mean(c12hour, na.rm = TRUE), by = "c172code", remove_na = TRUE)
   expect_snapshot(print(out))
   # sorting for multiple groups
   out <- data_summary(efc, MW = mean(c12hour, na.rm = TRUE), by = c("e42dep", "c172code"))
diff --git a/tests/testthat/test-data_tabulate.R b/tests/testthat/test-data_tabulate.R
index 5108e29c2..9848d42b9 100644
--- a/tests/testthat/test-data_tabulate.R
+++ b/tests/testthat/test-data_tabulate.R
@@ -287,11 +287,11 @@ test_that("data_tabulate exclude/include missing values", {
   efc$e16sex[sample.int(nrow(efc), 5)] <- NA
   out <- data_tabulate(efc$c172code)
   expect_identical(out$N, c(8L, 66L, 16L, 10L))
-  out <- data_tabulate(efc$c172code, include_na = FALSE)
+  out <- data_tabulate(efc$c172code, remove_na = TRUE)
   expect_identical(out$N, c(8L, 66L, 16L))
   out <- data_tabulate(efc$c172code, weights = efc$weights)
   expect_identical(out$N, c(10, 67, 15, 13))
-  out <- data_tabulate(efc$c172code, include_na = FALSE, weights = efc$weights)
+  out <- data_tabulate(efc$c172code, remove_na = TRUE, weights = efc$weights)
   expect_identical(out$N, c(10, 67, 15))
 })
 
@@ -305,17 +305,17 @@ test_that("data_tabulate, cross tables", {
   efc$e16sex[sample.int(nrow(efc), 5)] <- NA
 
   expect_snapshot(print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full")))
-  expect_snapshot(print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", include_na = FALSE)))
+  expect_snapshot(print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", remove_na = TRUE)))
   expect_snapshot(print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", weights = efc$weights)))
-  expect_snapshot(print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", include_na = FALSE, weights = efc$weights))) # nolint
+  expect_snapshot(print(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", remove_na = TRUE, weights = efc$weights))) # nolint
   expect_snapshot(print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row")))
-  expect_snapshot(print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", include_na = FALSE)))
+  expect_snapshot(print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", remove_na = TRUE)))
   expect_snapshot(print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", weights = efc$weights)))
-  expect_snapshot(print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", include_na = FALSE, weights = efc$weights))) # nolint
+  expect_snapshot(print(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", remove_na = TRUE, weights = efc$weights))) # nolint
   expect_snapshot(print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column")))
-  expect_snapshot(print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column", include_na = FALSE)))
+  expect_snapshot(print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column", remove_na = TRUE)))
   expect_snapshot(print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column", weights = "weights")))
-  expect_snapshot(print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column", include_na = FALSE, weights = "weights"))) # nolint
+  expect_snapshot(print(data_tabulate(efc, "c172code", by = "e16sex", proportions = "column", remove_na = TRUE, weights = "weights"))) # nolint
 })
 
 test_that("data_tabulate, cross tables, HTML", {
@@ -326,11 +326,11 @@ test_that("data_tabulate, cross tables, HTML", {
   efc$e16sex[sample.int(nrow(efc), 5)] <- NA
 
   expect_s3_class(print_html(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full")), "gt_tbl")
-  expect_s3_class(print_html(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", include_na = FALSE)), "gt_tbl") # nolint
+  expect_s3_class(print_html(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", remove_na = TRUE)), "gt_tbl") # nolint
   expect_s3_class(print_html(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", weights = efc$weights)), "gt_tbl") # nolint
-  expect_s3_class(print_html(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", include_na = FALSE, weights = efc$weights)), "gt_tbl") # nolint
+  expect_s3_class(print_html(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", remove_na = TRUE, weights = efc$weights)), "gt_tbl") # nolint
   expect_s3_class(print_html(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row")), "gt_tbl")
-  expect_s3_class(print_html(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", include_na = FALSE, weights = efc$weights)), "gt_tbl") # nolint
+  expect_s3_class(print_html(data_tabulate(efc, "c172code", by = efc$e16sex, proportions = "row", remove_na = TRUE, weights = efc$weights)), "gt_tbl") # nolint
 })
 
 test_that("data_tabulate, cross tables, grouped df", {
@@ -377,9 +377,9 @@ test_that("data_tabulate, cross tables, markdown", {
   efc$e16sex[sample.int(nrow(efc), 5)] <- NA
 
   expect_snapshot(print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full")))
-  expect_snapshot(print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", include_na = FALSE)))
+  expect_snapshot(print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", remove_na = TRUE)))
   expect_snapshot(print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", weights = efc$weights)))
-  expect_snapshot(print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", include_na = FALSE, weights = efc$weights))) # nolint
+  expect_snapshot(print_md(data_tabulate(efc$c172code, by = efc$e16sex, proportions = "full", remove_na = TRUE, weights = efc$weights))) # nolint
 })
 
 
@@ -389,12 +389,12 @@ test_that("data_tabulate, validate against table", {
   data(mtcars)
   # frequency table
   out1 <- as.data.frame(table(mtcars$cyl))
-  out2 <- data_tabulate(mtcars$cyl, include_na = FALSE)
+  out2 <- data_tabulate(mtcars$cyl, remove_na = TRUE)
   expect_identical(out1$Freq, out2$N)
   # crosstable
   out1 <- data_arrange(as.data.frame(table(mtcars$cyl, mtcars$gear)), c("Var1", "Var2"))
   out2 <- data_rename(data_to_long(
-    as.data.frame(data_tabulate(mtcars$cyl, by = mtcars$gear, include_na = FALSE)), 2:4,
+    as.data.frame(data_tabulate(mtcars$cyl, by = mtcars$gear, remove_na = TRUE)), 2:4,
     names_to = "Var2", values_to = "Freq"
   ), "mtcars$cyl", "Var1")
   out1[[2]] <- as.character(out1[[2]])