Define data frame method for dw_data_xtabulates object? (#517)

* Define data frame method for `dw_data_xtabulates` object? Fixes #516 * fix * fix * rename class attributes * rename class attr * news, version, docs * add tests * comply with S3 generic * news * split tests into smaller pieces * typo * add comment * typo * comment * comment
easystats · Jun 22, 2024 · 218fbbb · 218fbbb
1 parent 59d24e9
commit 218fbbb
Show file tree

Hide file tree

Showing 7 changed files with 230 additions and 38 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: datawizard
 Title: Easy Data Wrangling and Statistical Transformations
-Version: 0.11.0.2
+Version: 0.11.0.3
 Authors@R: c(
     person("Indrajeet", "Patil", , "[email protected]", role = "aut",
            comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")),

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,5 +1,7 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method(as.data.frame,datawizard_crosstabs)
+S3method(as.data.frame,datawizard_tables)
 S3method(as.double,parameters_kurtosis)
 S3method(as.double,parameters_skewness)
 S3method(as.double,parameters_smoothness)
@@ -69,9 +71,9 @@ S3method(describe_distribution,grouped_df)
 S3method(describe_distribution,list)
 S3method(describe_distribution,numeric)
 S3method(format,data_codebook)
+S3method(format,datawizard_crosstab)
+S3method(format,datawizard_table)
 S3method(format,dw_data_peek)
-S3method(format,dw_data_tabulate)
-S3method(format,dw_data_xtabulate)
 S3method(format,dw_groupmeans)
 S3method(format,parameters_distribution)
 S3method(kurtosis,data.frame)
@@ -93,12 +95,12 @@ S3method(normalize,numeric)
 S3method(plot,visualisation_recipe)
 S3method(print,data_codebook)
 S3method(print,data_seek)
+S3method(print,datawizard_crosstab)
+S3method(print,datawizard_crosstabs)
+S3method(print,datawizard_table)
+S3method(print,datawizard_tables)
 S3method(print,dw_data_peek)
 S3method(print,dw_data_summary)
-S3method(print,dw_data_tabulate)
-S3method(print,dw_data_tabulates)
-S3method(print,dw_data_xtabulate)
-S3method(print,dw_data_xtabulates)
 S3method(print,dw_groupmeans)
 S3method(print,dw_groupmeans_list)
 S3method(print,dw_transformer)
@@ -107,16 +109,16 @@ S3method(print,parameters_kurtosis)
 S3method(print,parameters_skewness)
 S3method(print,visualisation_recipe)
 S3method(print_html,data_codebook)
+S3method(print_html,datawizard_crosstab)
+S3method(print_html,datawizard_crosstabs)
+S3method(print_html,datawizard_table)
+S3method(print_html,datawizard_tables)
 S3method(print_html,dw_data_peek)
-S3method(print_html,dw_data_tabulate)
-S3method(print_html,dw_data_tabulates)
-S3method(print_html,dw_data_xtabulate)
-S3method(print_html,dw_data_xtabulates)
 S3method(print_md,data_codebook)
+S3method(print_md,datawizard_crosstab)
+S3method(print_md,datawizard_table)
+S3method(print_md,datawizard_tables)
 S3method(print_md,dw_data_peek)
-S3method(print_md,dw_data_tabulate)
-S3method(print_md,dw_data_tabulates)
-S3method(print_md,dw_data_xtabulate)
 S3method(ranktransform,data.frame)
 S3method(ranktransform,factor)
 S3method(ranktransform,grouped_df)

diff --git a/NEWS.md b/NEWS.md
@@ -1,10 +1,21 @@
 # datawizard 0.11.0.1
 
-## Changes
+BREAKING CHANGES
+
+* Class names for objects returned by `data_tabulate()` have been changed to
+  `datawizard_table` and `datawizard_crosstable` (resp. the plural forms,
+  `*_tables`), to provide a clearer and more consistent naming scheme.
+
+CHANGES
 
 * `data_select()` can directly rename selected variables when a named vector
   is provided in `select`, e.g. `data_select(mtcars, c(new1 = "mpg", new2 = "cyl"))`.
 
+* `data_tabulate()` gains an `as.data.frame()` method, to return the frequency
+  table as a data frame. The structure of the returned object is a nested data
+  frame, where the first column contains name of the variable for which
+  frequencies were calculated, and the second column contains the frequency table.
+
 # datawizard 0.11.0
 
 BREAKING CHANGES

diff --git a/R/data_tabulate.R b/R/data_tabulate.R
@@ -28,6 +28,13 @@
 #' @param ... not used.
 #' @inheritParams extract_column_names
 #'
+#' @details
+#' There is an `as.data.frame()` method, to return the frequency tables as a
+#' data frame. The structure of the returned object is a nested data frame,
+#' where the first column contains name of the variable for which frequencies
+#' were calculated, and the second column is a list column that contains the
+#' frequency tables as data frame. See 'Examples'.
+#'
 #' @section Crosstables:
 #' If `by` is supplied, a crosstable is created. The crosstable includes `<NA>`
 #' (missing) values by default. The first column indicates values of `x`, the
@@ -108,6 +115,12 @@
 #' # round percentages
 #' out <- data_tabulate(efc, "c172code", by = "e16sex", proportions = "column")
 #' print(out, digits = 0)
+#'
+#' # coerce to data frames
+#' result <- data_tabulate(efc, "c172code", by = "e16sex")
+#' as.data.frame(result)
+#' as.data.frame(result)$table
+#' as.data.frame(result, add_total = TRUE)$table
 #' @export
 data_tabulate <- function(x, ...) {
   UseMethod("data_tabulate")
@@ -242,7 +255,7 @@ data_tabulate.default <- function(x,
   attr(out, "total_n") <- sum(out$N, na.rm = TRUE)
   attr(out, "valid_n") <- valid_n
 
-  class(out) <- c("dw_data_tabulate", "data.frame")
+  class(out) <- c("datawizard_table", "data.frame")
 
   out
 }
@@ -292,9 +305,9 @@ data_tabulate.data.frame <- function(x,
   })
 
   if (is.null(by)) {
-    class(out) <- c("dw_data_tabulates", "list")
+    class(out) <- c("datawizard_tables", "list")
   } else {
-    class(out) <- c("dw_data_xtabulates", "list")
+    class(out) <- c("datawizard_crosstabs", "list")
   }
   attr(out, "collapse") <- isTRUE(collapse)
   attr(out, "is_weighted") <- !is.null(weights)
@@ -357,9 +370,9 @@ data_tabulate.grouped_df <- function(x,
     ))
   }
   if (is.null(by)) {
-    class(out) <- c("dw_data_tabulates", "list")
+    class(out) <- c("datawizard_tables", "list")
   } else {
-    class(out) <- c("dw_data_xtabulates", "list")
+    class(out) <- c("datawizard_crosstabs", "list")
   }
   attr(out, "collapse") <- isTRUE(collapse)
   attr(out, "is_weighted") <- !is.null(weights)
@@ -380,8 +393,64 @@ insight::print_html
 insight::print_md
 
 
+#' @rdname data_tabulate
+#' @param add_total For crosstables (i.e. when `by` is not `NULL`), a row and
+#' column with the total N values are added to the data frame. `add_total` has
+#' no effect in `as.data.frame()` for simple frequency tables.
+#' @inheritParams base::as.data.frame
+#' @export
+as.data.frame.datawizard_tables <- function(x,
+                                            row.names = NULL,
+                                            optional = FALSE,
+                                            ...,
+                                            stringsAsFactors = FALSE,
+                                            add_total = FALSE) {
+  # extract variables of frequencies
+  selected_vars <- unlist(lapply(x, function(i) attributes(i)$varname))
+  # coerce to data frame, remove rownames
+  data_frames <- lapply(x, function(i) {
+    # the `format()` methods for objects returned by `data_tabulate()` call
+    # `as.data.frame()` - we have to pay attention to avoid infinite iterations
+    # here. At the moment, this is no problem, as objects we have at this stage
+    # are of class "datawizard_table" or "datawizard_crosstab", while this
+    # `as.data.frame()` method is only called for "datawizard_tables" (the plural)
+    # form). Else, we would need to modify the class attribute here,
+    # e.g. class(i) <- "data.frame"
+    if (add_total) {
+      # to add the total column and row, we simply can call `format()`
+      out <- as.data.frame(format(i))
+      for (cols in 2:ncol(out)) {
+        # since "format()" returns a character matrix, we want to convert
+        # the columns to numeric. We have to exclude the first column, as the
+        # first column is character, due to the added "Total" value.
+        out[[cols]] <- as.numeric(out[[cols]])
+      }
+      # after formatting, we have a "separator" row for nicer printing.
+      # this should also be removed
+      out <- remove_empty_rows(out)
+    } else {
+      out <- as.data.frame(i)
+    }
+    rownames(out) <- NULL
+    out
+  })
+  # create nested data frame
+  result <- data.frame(
+    var = selected_vars,
+    table = I(data_frames),
+    stringsAsFactors = stringsAsFactors
+  )
+  # consider additional arguments
+  rownames(result) <- row.names
+  result
+}
+
+#' @export
+as.data.frame.datawizard_crosstabs <- as.data.frame.datawizard_tables
+
+
 #' @export
-format.dw_data_tabulate <- function(x, format = "text", big_mark = NULL, ...) {
+format.datawizard_table <- function(x, format = "text", big_mark = NULL, ...) {
   # convert to character manually, else, for large numbers,
   # format_table() returns scientific notation
   x <- as.data.frame(x)
@@ -414,7 +483,7 @@ format.dw_data_tabulate <- function(x, format = "text", big_mark = NULL, ...) {
 
 
 #' @export
-print.dw_data_tabulate <- function(x, big_mark = NULL, ...) {
+print.datawizard_table <- function(x, big_mark = NULL, ...) {
   a <- attributes(x)
 
   # "table" header with variable label/name, and type
@@ -456,7 +525,7 @@ print.dw_data_tabulate <- function(x, big_mark = NULL, ...) {
 
 
 #' @export
-print_html.dw_data_tabulate <- function(x, big_mark = NULL, ...) {
+print_html.datawizard_table <- function(x, big_mark = NULL, ...) {
   a <- attributes(x)
 
   # "table" header with variable label/name, and type
@@ -486,7 +555,7 @@ print_html.dw_data_tabulate <- function(x, big_mark = NULL, ...) {
 
 
 #' @export
-print_md.dw_data_tabulate <- function(x, big_mark = NULL, ...) {
+print_md.datawizard_table <- function(x, big_mark = NULL, ...) {
   a <- attributes(x)
 
   # "table" header with variable label/name, and type
@@ -516,7 +585,7 @@ print_md.dw_data_tabulate <- function(x, big_mark = NULL, ...) {
 
 
 #' @export
-print.dw_data_tabulates <- function(x, big_mark = NULL, ...) {
+print.datawizard_tables <- function(x, big_mark = NULL, ...) {
   # check if we have weights
   is_weighted <- isTRUE(attributes(x)$is_weighted)
 
@@ -555,7 +624,7 @@ print.dw_data_tabulates <- function(x, big_mark = NULL, ...) {
 
 
 #' @export
-print_html.dw_data_tabulates <- function(x, big_mark = NULL, ...) {
+print_html.datawizard_tables <- function(x, big_mark = NULL, ...) {
   # check if we have weights
   is_weighted <- isTRUE(attributes(x)$is_weighted)
 
@@ -584,7 +653,7 @@ print_html.dw_data_tabulates <- function(x, big_mark = NULL, ...) {
 
 
 #' @export
-print_md.dw_data_tabulates <- function(x, big_mark = NULL, ...) {
+print_md.datawizard_tables <- function(x, big_mark = NULL, ...) {
   # check if we have weights
   is_weighted <- isTRUE(attributes(x)$is_weighted)
 

diff --git a/R/data_xtabulate.R b/R/data_xtabulate.R
@@ -74,8 +74,9 @@
   attr(out, "total_n") <- total_n
   attr(out, "weights") <- weights
   attr(out, "proportions") <- proportions
+  attr(out, "varname") <- obj_name
 
-  class(out) <- c("dw_data_xtabulate", "data.frame")
+  class(out) <- c("datawizard_crosstab", "data.frame")
 
   out
 }
@@ -85,7 +86,7 @@
 
 
 #' @export
-format.dw_data_xtabulate <- function(x, format = "text", digits = 1, big_mark = NULL, ...) {
+format.datawizard_crosstab <- function(x, format = "text", digits = 1, big_mark = NULL, ...) {
   # convert to character manually, else, for large numbers,
   # format_table() returns scientific notation
   x <- as.data.frame(x)
@@ -178,7 +179,7 @@ format.dw_data_xtabulate <- function(x, format = "text", digits = 1, big_mark =
 
 
 #' @export
-print.dw_data_xtabulate <- function(x, big_mark = NULL, ...) {
+print.datawizard_crosstab <- function(x, big_mark = NULL, ...) {
   # grouped data? if yes, add information on grouping factor
   if (is.null(x[["Group"]])) {
     caption <- NULL
@@ -200,7 +201,7 @@ print.dw_data_xtabulate <- function(x, big_mark = NULL, ...) {
 
 
 #' @export
-print_md.dw_data_xtabulate <- function(x, big_mark = NULL, ...) {
+print_md.datawizard_crosstab <- function(x, big_mark = NULL, ...) {
   # grouped data? if yes, add information on grouping factor
   if (is.null(x[["Group"]])) {
     caption <- NULL
@@ -222,7 +223,7 @@ print_md.dw_data_xtabulate <- function(x, big_mark = NULL, ...) {
 
 
 #' @export
-print_html.dw_data_xtabulate <- function(x, big_mark = NULL, ...) {
+print_html.datawizard_crosstab <- function(x, big_mark = NULL, ...) {
   # grouped data? if yes, add information on grouping factor
   if (!is.null(x[["Group"]])) {
     x$groups <- paste0("Grouped by ", x[["Group"]][1])
@@ -240,7 +241,7 @@ print_html.dw_data_xtabulate <- function(x, big_mark = NULL, ...) {
 
 
 #' @export
-print.dw_data_xtabulates <- function(x, big_mark = NULL, ...) {
+print.datawizard_crosstabs <- function(x, big_mark = NULL, ...) {
   for (i in seq_along(x)) {
     print(x[[i]], big_mark = big_mark, ...)
     cat("\n")
@@ -250,7 +251,7 @@ print.dw_data_xtabulates <- function(x, big_mark = NULL, ...) {
 
 
 #' @export
-print_html.dw_data_xtabulates <- function(x, big_mark = NULL, ...) {
+print_html.datawizard_crosstabs <- function(x, big_mark = NULL, ...) {
   if (length(x) == 1) {
     print_html(x[[1]], big_mark = big_mark, ...)
   } else {