Rename rowmean_n() to row_means() (#448)

* Rename `rowmean_n()`? Fixes #447 * Update row_means.R * fix * fix * tests * docs * update pkgdown * fix tests * docs * Update NEWS.md Co-authored-by: Etienne Bacher <[email protected]> * version bump --------- Co-authored-by: Etienne Bacher <[email protected]>
easystats · Sep 7, 2023 · 877c587 · 877c587
1 parent 10599b2
commit 877c587
Show file tree

Hide file tree

Showing 10 changed files with 314 additions and 205 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: datawizard
 Title: Easy Data Wrangling and Statistical Transformations
-Version: 0.8.0.8
+Version: 0.8.0.9
 Authors@R: c(
     person("Indrajeet", "Patil", , "[email protected]", role = "aut",
            comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")),

diff --git a/NAMESPACE b/NAMESPACE
@@ -278,9 +278,9 @@ export(reshape_longer)
 export(reshape_wider)
 export(reverse)
 export(reverse_scale)
+export(row_means)
 export(row_to_colnames)
 export(rowid_as_column)
-export(rowmean_n)
 export(rownames_as_column)
 export(skewness)
 export(slide)

diff --git a/NEWS.md b/NEWS.md
@@ -2,10 +2,10 @@
 
 NEW FUNCTIONS
 
-* `contr.deviation()` for sum-deviation contrast coding of factors.
+* `row_means()`, to compute row means, optionally only for the rows with at
+  least `min_valid` non-missing values.
 
-* `rowmean_n()`, to compute row means if row contains at least `n` non-missing
-  values.
+* `contr.deviation()` for sum-deviation contrast coding of factors.
 
 * `means_by_group()`, to compute mean values of variables, grouped by levels
   of specified factors.

diff --git a/R/row_means.R b/R/row_means.R
@@ -0,0 +1,139 @@
+#' @title Row means (optionally with minimum amount of valid values)
+#' @name row_means
+#' @description This function is similar to the SPSS `MEAN.n` function and computes
+#' row means from a data frame or matrix if at least `min_valid` values of a row are
+#' valid (and not `NA`).
+#'
+#' @param data A data frame with at least two columns, where row means are applied.
+#' @param min_valid Optional, a numeric value of length 1. May either be
+#' - a numeric value that indicates the amount of valid values per row to
+#'   calculate the row mean;
+#' - or a value between 0 and 1, indicating a proportion of valid values per
+#'   row to calculate the row mean (see 'Details').
+#' - `NULL` (default), in which all cases are considered.
+#'
+#' If a row's sum of valid values is less than `min_valid`, `NA` will be returned.
+#' @param digits Numeric value indicating the number of decimal places to be
+#' used for rounding mean values. Negative values are allowed (see 'Details').
+#' By default, `digits = NULL` and no rounding is used.
+#' @param remove_na Logical, if `TRUE` (default), removes missing (`NA`) values
+#' before calculating row means. Only applies if `min_valuid` is not specified.
+#' @param verbose Toggle warnings.
+#' @inheritParams find_columns
+#'
+#' @return A vector with row means for those rows with at least `n` valid values.
+#'
+#' @details Rounding to a negative number of `digits` means rounding to a power of
+#' ten, for example `row_means(df, 3, digits = -2)` rounds to the nearest hundred.
+#' For `min_valid`, if not `NULL`, `min_valid` must be a numeric value from `0`
+#' to `ncol(data)`. If a row in the data frame has at least `min_valid`
+#' non-missing values, the row mean is returned. If `min_valid` is a non-integer
+#' value from 0 to 1, `min_valid` is considered to indicate the proportion of
+#' required non-missing values per row. E.g., if `min_valid = 0.75`, a row must
+#' have at least `ncol(data) * min_valid` non-missing values for the row mean
+#' to be calculated. See 'Examples'.
+#'
+#' @examples
+#' dat <- data.frame(
+#'   c1 = c(1, 2, NA, 4),
+#'   c2 = c(NA, 2, NA, 5),
+#'   c3 = c(NA, 4, NA, NA),
+#'   c4 = c(2, 3, 7, 8)
+#' )
+#'
+#' # default, all means are shown, if no NA values are present
+#' row_means(dat)
+#'
+#' # remove all NA before computing row means
+#' row_means(dat, remove_na = TRUE)
+#'
+#' # needs at least 4 non-missing values per row
+#' row_means(dat, min_valid = 4) # 1 valid return value
+#'
+#' # needs at least 3 non-missing values per row
+#' row_means(dat, min_valid = 3) # 2 valid return values
+#'
+#' # needs at least 2 non-missing values per row
+#' row_means(dat, min_valid = 2)
+#'
+#' # needs at least 1 non-missing value per row, for two selected variables
+#' row_means(dat, select = c("c1", "c3"), min_valid = 1)
+#'
+#' # needs at least 50% of non-missing values per row
+#' row_means(dat, min_valid = 0.5) # 3 valid return values
+#'
+#' # needs at least 75% of non-missing values per row
+#' row_means(dat, min_valid = 0.75) # 2 valid return values
+#'
+#' @export
+row_means <- function(data,
+                      select = NULL,
+                      exclude = NULL,
+                      min_valid = NULL,
+                      digits = NULL,
+                      ignore_case = FALSE,
+                      regex = FALSE,
+                      remove_na = FALSE,
+                      verbose = TRUE) {
+  # evaluate arguments
+  select <- .select_nse(select,
+    data,
+    exclude,
+    ignore_case = ignore_case,
+    regex = regex,
+    verbose = verbose
+  )
+
+  if (is.null(select) || length(select) == 0) {
+    insight::format_error("No columns selected.")
+  }
+
+  data <- .coerce_to_dataframe(data[select])
+
+  # n must be a numeric, non-missing value
+  if (!is.null(min_valid) && (all(is.na(min_valid)) || !is.numeric(min_valid) || length(min_valid) > 1)) {
+    insight::format_error("`min_valid` must be a numeric value of length 1.")
+  }
+
+  # make sure we only have numeric values
+  numeric_columns <- vapply(data, is.numeric, TRUE)
+  if (!all(numeric_columns)) {
+    if (verbose) {
+      insight::format_alert("Only numeric columns are considered for calculation.")
+    }
+    data <- data[numeric_columns]
+  }
+
+  # check if we have a data framme with at least two columns
+  if (ncol(data) < 2) {
+    insight::format_error("`data` must be a data frame with at least two numeric columns.")
+  }
+
+  # proceed here if min_valid is not NULL
+  if (!is.null(min_valid)) {
+    # is 'min_valid' indicating a proportion?
+    decimals <- min_valid %% 1
+    if (decimals != 0) {
+      min_valid <- round(ncol(data) * decimals)
+    }
+
+    # min_valid may not be larger as df's amount of columns
+    if (ncol(data) < min_valid) {
+      insight::format_error("`min_valid` must be smaller or equal to number of columns in data frame.")
+    }
+
+    # row means
+    to_na <- rowSums(is.na(data)) > ncol(data) - min_valid
+    out <- rowMeans(data, na.rm = TRUE)
+    out[to_na] <- NA
+  } else {
+    out <- rowMeans(data, na.rm = remove_na)
+  }
+
+  # round, if requested
+  if (!is.null(digits) && !all(is.na(digits))) {
+    out <- round(out, digits = digits)
+  }
+
+  out
+}
diff --git a/R/rowmean_n.R b/R/rowmean_n.R
diff --git a/_pkgdown.yaml b/_pkgdown.yaml
@@ -68,8 +68,8 @@ reference:
       - kurtosis
       - smoothness
       - skewness
+      - row_means
       - weighted_mean
-      - rowmean_n
       - mean_sd
 
   - title: Convert and Replace Data