Skip to content

Commit

Permalink
Define data frame method for dw_data_xtabulates object? (#517)
Browse files Browse the repository at this point in the history
* Define data frame method for `dw_data_xtabulates` object?
Fixes #516

* fix

* fix

* rename class attributes

* rename class attr

* news, version, docs

* add tests

* comply with S3 generic

* news

* split tests into smaller pieces

* typo

* add comment

* typo

* comment

* comment
  • Loading branch information
strengejacke authored Jun 22, 2024
1 parent 59d24e9 commit 218fbbb
Show file tree
Hide file tree
Showing 7 changed files with 230 additions and 38 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: datawizard
Title: Easy Data Wrangling and Statistical Transformations
Version: 0.11.0.2
Version: 0.11.0.3
Authors@R: c(
person("Indrajeet", "Patil", , "[email protected]", role = "aut",
comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")),
Expand Down
28 changes: 15 additions & 13 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Generated by roxygen2: do not edit by hand

S3method(as.data.frame,datawizard_crosstabs)
S3method(as.data.frame,datawizard_tables)
S3method(as.double,parameters_kurtosis)
S3method(as.double,parameters_skewness)
S3method(as.double,parameters_smoothness)
Expand Down Expand Up @@ -69,9 +71,9 @@ S3method(describe_distribution,grouped_df)
S3method(describe_distribution,list)
S3method(describe_distribution,numeric)
S3method(format,data_codebook)
S3method(format,datawizard_crosstab)
S3method(format,datawizard_table)
S3method(format,dw_data_peek)
S3method(format,dw_data_tabulate)
S3method(format,dw_data_xtabulate)
S3method(format,dw_groupmeans)
S3method(format,parameters_distribution)
S3method(kurtosis,data.frame)
Expand All @@ -93,12 +95,12 @@ S3method(normalize,numeric)
S3method(plot,visualisation_recipe)
S3method(print,data_codebook)
S3method(print,data_seek)
S3method(print,datawizard_crosstab)
S3method(print,datawizard_crosstabs)
S3method(print,datawizard_table)
S3method(print,datawizard_tables)
S3method(print,dw_data_peek)
S3method(print,dw_data_summary)
S3method(print,dw_data_tabulate)
S3method(print,dw_data_tabulates)
S3method(print,dw_data_xtabulate)
S3method(print,dw_data_xtabulates)
S3method(print,dw_groupmeans)
S3method(print,dw_groupmeans_list)
S3method(print,dw_transformer)
Expand All @@ -107,16 +109,16 @@ S3method(print,parameters_kurtosis)
S3method(print,parameters_skewness)
S3method(print,visualisation_recipe)
S3method(print_html,data_codebook)
S3method(print_html,datawizard_crosstab)
S3method(print_html,datawizard_crosstabs)
S3method(print_html,datawizard_table)
S3method(print_html,datawizard_tables)
S3method(print_html,dw_data_peek)
S3method(print_html,dw_data_tabulate)
S3method(print_html,dw_data_tabulates)
S3method(print_html,dw_data_xtabulate)
S3method(print_html,dw_data_xtabulates)
S3method(print_md,data_codebook)
S3method(print_md,datawizard_crosstab)
S3method(print_md,datawizard_table)
S3method(print_md,datawizard_tables)
S3method(print_md,dw_data_peek)
S3method(print_md,dw_data_tabulate)
S3method(print_md,dw_data_tabulates)
S3method(print_md,dw_data_xtabulate)
S3method(ranktransform,data.frame)
S3method(ranktransform,factor)
S3method(ranktransform,grouped_df)
Expand Down
13 changes: 12 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,21 @@
# datawizard 0.11.0.1

## Changes
BREAKING CHANGES

* Class names for objects returned by `data_tabulate()` have been changed to
`datawizard_table` and `datawizard_crosstable` (resp. the plural forms,
`*_tables`), to provide a clearer and more consistent naming scheme.

CHANGES

* `data_select()` can directly rename selected variables when a named vector
is provided in `select`, e.g. `data_select(mtcars, c(new1 = "mpg", new2 = "cyl"))`.

* `data_tabulate()` gains an `as.data.frame()` method, to return the frequency
table as a data frame. The structure of the returned object is a nested data
frame, where the first column contains name of the variable for which
frequencies were calculated, and the second column contains the frequency table.

# datawizard 0.11.0

BREAKING CHANGES
Expand Down
93 changes: 81 additions & 12 deletions R/data_tabulate.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,13 @@
#' @param ... not used.
#' @inheritParams extract_column_names
#'
#' @details
#' There is an `as.data.frame()` method, to return the frequency tables as a
#' data frame. The structure of the returned object is a nested data frame,
#' where the first column contains name of the variable for which frequencies
#' were calculated, and the second column is a list column that contains the
#' frequency tables as data frame. See 'Examples'.
#'
#' @section Crosstables:
#' If `by` is supplied, a crosstable is created. The crosstable includes `<NA>`
#' (missing) values by default. The first column indicates values of `x`, the
Expand Down Expand Up @@ -108,6 +115,12 @@
#' # round percentages
#' out <- data_tabulate(efc, "c172code", by = "e16sex", proportions = "column")
#' print(out, digits = 0)
#'
#' # coerce to data frames
#' result <- data_tabulate(efc, "c172code", by = "e16sex")
#' as.data.frame(result)
#' as.data.frame(result)$table
#' as.data.frame(result, add_total = TRUE)$table
#' @export
data_tabulate <- function(x, ...) {
UseMethod("data_tabulate")
Expand Down Expand Up @@ -242,7 +255,7 @@ data_tabulate.default <- function(x,
attr(out, "total_n") <- sum(out$N, na.rm = TRUE)
attr(out, "valid_n") <- valid_n

class(out) <- c("dw_data_tabulate", "data.frame")
class(out) <- c("datawizard_table", "data.frame")

out
}
Expand Down Expand Up @@ -292,9 +305,9 @@ data_tabulate.data.frame <- function(x,
})

if (is.null(by)) {
class(out) <- c("dw_data_tabulates", "list")
class(out) <- c("datawizard_tables", "list")
} else {
class(out) <- c("dw_data_xtabulates", "list")
class(out) <- c("datawizard_crosstabs", "list")
}
attr(out, "collapse") <- isTRUE(collapse)
attr(out, "is_weighted") <- !is.null(weights)
Expand Down Expand Up @@ -357,9 +370,9 @@ data_tabulate.grouped_df <- function(x,
))
}
if (is.null(by)) {
class(out) <- c("dw_data_tabulates", "list")
class(out) <- c("datawizard_tables", "list")
} else {
class(out) <- c("dw_data_xtabulates", "list")
class(out) <- c("datawizard_crosstabs", "list")
}
attr(out, "collapse") <- isTRUE(collapse)
attr(out, "is_weighted") <- !is.null(weights)
Expand All @@ -380,8 +393,64 @@ insight::print_html
insight::print_md


#' @rdname data_tabulate
#' @param add_total For crosstables (i.e. when `by` is not `NULL`), a row and
#' column with the total N values are added to the data frame. `add_total` has
#' no effect in `as.data.frame()` for simple frequency tables.
#' @inheritParams base::as.data.frame
#' @export
as.data.frame.datawizard_tables <- function(x,
row.names = NULL,
optional = FALSE,
...,
stringsAsFactors = FALSE,
add_total = FALSE) {
# extract variables of frequencies
selected_vars <- unlist(lapply(x, function(i) attributes(i)$varname))
# coerce to data frame, remove rownames
data_frames <- lapply(x, function(i) {
# the `format()` methods for objects returned by `data_tabulate()` call
# `as.data.frame()` - we have to pay attention to avoid infinite iterations
# here. At the moment, this is no problem, as objects we have at this stage
# are of class "datawizard_table" or "datawizard_crosstab", while this
# `as.data.frame()` method is only called for "datawizard_tables" (the plural)
# form). Else, we would need to modify the class attribute here,
# e.g. class(i) <- "data.frame"
if (add_total) {
# to add the total column and row, we simply can call `format()`
out <- as.data.frame(format(i))
for (cols in 2:ncol(out)) {
# since "format()" returns a character matrix, we want to convert
# the columns to numeric. We have to exclude the first column, as the
# first column is character, due to the added "Total" value.
out[[cols]] <- as.numeric(out[[cols]])
}
# after formatting, we have a "separator" row for nicer printing.
# this should also be removed
out <- remove_empty_rows(out)
} else {
out <- as.data.frame(i)
}
rownames(out) <- NULL
out
})
# create nested data frame
result <- data.frame(
var = selected_vars,
table = I(data_frames),
stringsAsFactors = stringsAsFactors
)
# consider additional arguments
rownames(result) <- row.names
result
}

#' @export
as.data.frame.datawizard_crosstabs <- as.data.frame.datawizard_tables


#' @export
format.dw_data_tabulate <- function(x, format = "text", big_mark = NULL, ...) {
format.datawizard_table <- function(x, format = "text", big_mark = NULL, ...) {
# convert to character manually, else, for large numbers,
# format_table() returns scientific notation
x <- as.data.frame(x)
Expand Down Expand Up @@ -414,7 +483,7 @@ format.dw_data_tabulate <- function(x, format = "text", big_mark = NULL, ...) {


#' @export
print.dw_data_tabulate <- function(x, big_mark = NULL, ...) {
print.datawizard_table <- function(x, big_mark = NULL, ...) {
a <- attributes(x)

# "table" header with variable label/name, and type
Expand Down Expand Up @@ -456,7 +525,7 @@ print.dw_data_tabulate <- function(x, big_mark = NULL, ...) {


#' @export
print_html.dw_data_tabulate <- function(x, big_mark = NULL, ...) {
print_html.datawizard_table <- function(x, big_mark = NULL, ...) {
a <- attributes(x)

# "table" header with variable label/name, and type
Expand Down Expand Up @@ -486,7 +555,7 @@ print_html.dw_data_tabulate <- function(x, big_mark = NULL, ...) {


#' @export
print_md.dw_data_tabulate <- function(x, big_mark = NULL, ...) {
print_md.datawizard_table <- function(x, big_mark = NULL, ...) {
a <- attributes(x)

# "table" header with variable label/name, and type
Expand Down Expand Up @@ -516,7 +585,7 @@ print_md.dw_data_tabulate <- function(x, big_mark = NULL, ...) {


#' @export
print.dw_data_tabulates <- function(x, big_mark = NULL, ...) {
print.datawizard_tables <- function(x, big_mark = NULL, ...) {
# check if we have weights
is_weighted <- isTRUE(attributes(x)$is_weighted)

Expand Down Expand Up @@ -555,7 +624,7 @@ print.dw_data_tabulates <- function(x, big_mark = NULL, ...) {


#' @export
print_html.dw_data_tabulates <- function(x, big_mark = NULL, ...) {
print_html.datawizard_tables <- function(x, big_mark = NULL, ...) {
# check if we have weights
is_weighted <- isTRUE(attributes(x)$is_weighted)

Expand Down Expand Up @@ -584,7 +653,7 @@ print_html.dw_data_tabulates <- function(x, big_mark = NULL, ...) {


#' @export
print_md.dw_data_tabulates <- function(x, big_mark = NULL, ...) {
print_md.datawizard_tables <- function(x, big_mark = NULL, ...) {
# check if we have weights
is_weighted <- isTRUE(attributes(x)$is_weighted)

Expand Down
15 changes: 8 additions & 7 deletions R/data_xtabulate.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,9 @@
attr(out, "total_n") <- total_n
attr(out, "weights") <- weights
attr(out, "proportions") <- proportions
attr(out, "varname") <- obj_name

class(out) <- c("dw_data_xtabulate", "data.frame")
class(out) <- c("datawizard_crosstab", "data.frame")

out
}
Expand All @@ -85,7 +86,7 @@


#' @export
format.dw_data_xtabulate <- function(x, format = "text", digits = 1, big_mark = NULL, ...) {
format.datawizard_crosstab <- function(x, format = "text", digits = 1, big_mark = NULL, ...) {
# convert to character manually, else, for large numbers,
# format_table() returns scientific notation
x <- as.data.frame(x)
Expand Down Expand Up @@ -178,7 +179,7 @@ format.dw_data_xtabulate <- function(x, format = "text", digits = 1, big_mark =


#' @export
print.dw_data_xtabulate <- function(x, big_mark = NULL, ...) {
print.datawizard_crosstab <- function(x, big_mark = NULL, ...) {
# grouped data? if yes, add information on grouping factor
if (is.null(x[["Group"]])) {
caption <- NULL
Expand All @@ -200,7 +201,7 @@ print.dw_data_xtabulate <- function(x, big_mark = NULL, ...) {


#' @export
print_md.dw_data_xtabulate <- function(x, big_mark = NULL, ...) {
print_md.datawizard_crosstab <- function(x, big_mark = NULL, ...) {
# grouped data? if yes, add information on grouping factor
if (is.null(x[["Group"]])) {
caption <- NULL
Expand All @@ -222,7 +223,7 @@ print_md.dw_data_xtabulate <- function(x, big_mark = NULL, ...) {


#' @export
print_html.dw_data_xtabulate <- function(x, big_mark = NULL, ...) {
print_html.datawizard_crosstab <- function(x, big_mark = NULL, ...) {
# grouped data? if yes, add information on grouping factor
if (!is.null(x[["Group"]])) {
x$groups <- paste0("Grouped by ", x[["Group"]][1])
Expand All @@ -240,7 +241,7 @@ print_html.dw_data_xtabulate <- function(x, big_mark = NULL, ...) {


#' @export
print.dw_data_xtabulates <- function(x, big_mark = NULL, ...) {
print.datawizard_crosstabs <- function(x, big_mark = NULL, ...) {
for (i in seq_along(x)) {
print(x[[i]], big_mark = big_mark, ...)
cat("\n")
Expand All @@ -250,7 +251,7 @@ print.dw_data_xtabulates <- function(x, big_mark = NULL, ...) {


#' @export
print_html.dw_data_xtabulates <- function(x, big_mark = NULL, ...) {
print_html.datawizard_crosstabs <- function(x, big_mark = NULL, ...) {
if (length(x) == 1) {
print_html(x[[1]], big_mark = big_mark, ...)
} else {
Expand Down
Loading

0 comments on commit 218fbbb

Please sign in to comment.