Flexible ROPE values for describe_posterior (#675)

* Flexible ROPE values for describe_posterior Fixes #643 * fix p_significance * fix print for p_significance * docs * lintr * print methods * add test * Update test-p_significance.R * news, tests * equi_test * fix test * fix * update * DRY * fix typo * fix * allow named vectors * fix * Update test-p_significance.R * fix test warning * lintr * revert
easystats · Sep 17, 2024 · a40c975 · a40c975
1 parent cde9716
commit a40c975
Show file tree

Hide file tree

Showing 24 changed files with 644 additions and 147 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: bayestestR
 Title: Understand and Describe Bayesian Models and Posterior Distributions
-Version: 0.14.0.8
+Version: 0.14.0.9
 Authors@R:
     c(person(given = "Dominique",
              family = "Makowski",

diff --git a/NEWS.md b/NEWS.md
@@ -2,14 +2,18 @@
 
 ## Changes
 
-* Support for `posterior::rvar`-type column in data frames. 
-  For example, a data frame `df` with an `rvar` column `".pred"` can now be 
+* Support for `posterior::rvar`-type column in data frames.
+  For example, a data frame `df` with an `rvar` column `".pred"` can now be
   called directly via `p_direction(df, rvar_col = ".pred")`.
 
 * Added support for `{marginaleffects}`
 
+* The ROPE or threshold ranges in `rope()`, `describe_posterior()`, `p_significance()`
+  and `equivalence_test()` can now be specified as a list. This allows for different
+  ranges for different parameters.
+
 * Results from objects generated by `{emmeans}` (`emmGrid`/`emm_list`) now
-  return results with appended grid-data. 
+  return results with appended grid-data.
 
 * Usability improvements for `p_direction()`:
 

diff --git a/R/describe_posterior.R b/R/describe_posterior.R
@@ -18,9 +18,10 @@
 #'   For each "test", the corresponding \pkg{bayestestR} function is called
 #'   (e.g. [bayestestR::rope()] or [bayestestR::p_direction()]) and its results
 #'   included in the summary output.
-#' @param rope_range ROPE's lower and higher bounds. Should be a list of two
-#'   values (e.g., `c(-0.1, 0.1)`) or `"default"`. If `"default"`,
-#'   the bounds are set to `x +- 0.1*SD(response)`.
+#' @param rope_range ROPE's lower and higher bounds. Should be a vector of two
+#'   values (e.g., `c(-0.1, 0.1)`), `"default"` or a list of numeric vectors of
+#'   the same length as numbers of parameters. If `"default"`, the bounds are
+#'   set to `x +- 0.1*SD(response)`.
 #' @param rope_ci The Credible Interval (CI) probability, corresponding to the
 #'   proportion of HDI, to use for the percentage in ROPE.
 #' @param keep_iterations If `TRUE`, will keep all iterations (draws) of
@@ -90,6 +91,7 @@
 #'   describe_posterior(model)
 #'   describe_posterior(model, centrality = "all", dispersion = TRUE, test = "all")
 #'   describe_posterior(model, ci = c(0.80, 0.90))
+#'   describe_posterior(model, rope_range = list(c(-10, 5), c(-0.2, 0.2), "default"))
 #'
 #'   # emmeans estimates
 #'   # -----------------------------------------------

diff --git a/R/equivalence_test.R b/R/equivalence_test.R
@@ -85,6 +85,10 @@
 #' \donttest{
 #' model <- rstanarm::stan_glm(mpg ~ wt + cyl, data = mtcars)
 #' equivalence_test(model)
+#' # multiple ROPE ranges - asymmetric, symmetric, default
+#' equivalence_test(model, range = list(c(10, 40), c(-5, -4), "default"))
+#' # named ROPE ranges
+#' equivalence_test(model, range = list(wt = c(-5, -4), `(Intercept)` = c(10, 40)))
 #'
 #' # plot result
 #' test <- equivalence_test(model)
@@ -163,13 +167,33 @@ equivalence_test.data.frame <- function(x, range = "default", ci = 0.95, rvar_co
     return(.append_datagrid(out, x))
   }
 
-  l <- insight::compact_list(lapply(
-    x,
-    equivalence_test,
-    range = range,
-    ci = ci,
-    verbose = verbose
-  ))
+  # multiple ranges for the parameters - iterate over parameters and range
+  if (is.list(range)) {
+    # check if list of values contains only valid values
+    range <- .check_list_range(range, x)
+    # apply thresholds to each column
+    l <- insight::compact_list(mapply(
+      function(p, r) {
+        equivalence_test(
+          p,
+          range = r,
+          ci = ci,
+          verbose = verbose
+        )
+      },
+      x,
+      range,
+      SIMPLIFY = FALSE
+    ))
+  } else {
+    l <- insight::compact_list(lapply(
+      x,
+      equivalence_test,
+      range = range,
+      ci = ci,
+      verbose = verbose
+    ))
+  }
 
   dat <- do.call(rbind, l)
   out <- data.frame(
@@ -244,7 +268,7 @@ equivalence_test.BFBayesFactor <- function(x, range = "default", ci = 0.95, verb
                                      verbose = TRUE) {
   if (all(range == "default")) {
     range <- rope_range(x, verbose = verbose)
-  } else if (!all(is.numeric(range)) || length(range) != 2L) {
+  } else if (!is.list(range) && (!all(is.numeric(range)) || length(range) != 2L)) {
     insight::format_error("`range` should be 'default' or a vector of 2 numeric values (e.g., c(-0.1, 0.1)).")
   }
 
@@ -257,24 +281,7 @@ equivalence_test.BFBayesFactor <- function(x, range = "default", ci = 0.95, verb
     verbose = verbose
   )
 
-  l <- sapply(
-    params,
-    equivalence_test,
-    range = range,
-    ci = ci,
-    verbose = verbose,
-    simplify = FALSE
-  )
-
-  dat <- do.call(rbind, l)
-  out <- data.frame(
-    Parameter = rep(names(l), each = nrow(dat) / length(l)),
-    dat,
-    stringsAsFactors = FALSE
-  )
-
-  class(out) <- unique(c("equivalence_test", "see_equivalence_test", class(out)))
-  out
+  equivalence_test(params, range = range, ci = ci, verbose = verbose)
 }
 
 

diff --git a/R/eti.R b/R/eti.R
@@ -316,8 +316,8 @@ eti.get_predicted <- function(x, ci = 0.95, use_iterations = FALSE, verbose = TR
   ))
 
   data.frame(
-    "CI" = ci,
-    "CI_low" = results[1],
-    "CI_high" = results[2]
+    CI = ci,
+    CI_low = results[1],
+    CI_high = results[2]
   )
 }
diff --git a/R/p_rope.R b/R/p_rope.R
@@ -229,7 +229,7 @@ p_rope.mcmc.list <- p_rope.mcmc
 #' @keywords internal
 .p_rope <- function(rope_rez) {
   cols <- c("Parameter", "ROPE_low", "ROPE_high", "ROPE_Percentage", "Effects", "Component")
-  out <- as.data.frame(rope_rez[cols[cols %in% names(rope_rez)]])
+  out <- as.data.frame(rope_rez)[cols[cols %in% names(rope_rez)]]
   names(out)[names(out) == "ROPE_Percentage"] <- "p_ROPE"
 
   class(out) <- c("p_rope", "see_p_rope", "data.frame")

diff --git a/R/p_significance.R b/R/p_significance.R
@@ -15,7 +15,11 @@
 #'   (i.e. the threshold range is set to -0.1 and 0.1, i.e. reflects a symmetric
 #'   interval)
 #' - a numeric vector of length two (e.g., `c(-0.2, 0.1)`), useful for
-#'   asymmetric intervals.
+#'   asymmetric intervals
+#' - a list of numeric vectors, where each vector corresponds to a parameter
+#' - a list of *named* numeric vectors, where names correspond to parameter
+#'   names. In this case, all parameters that have no matching name in `threshold`
+#'   will be set to `"default"`.
 #' @inheritParams rope
 #' @inheritParams hdi
 #'
@@ -53,6 +57,10 @@
 #'   chains = 2, refresh = 0
 #' )
 #' p_significance(model)
+#' # multiple thresholds - asymmetric, symmetric, default
+#' p_significance(model, threshold = list(c(-10, 5), 0.2, "default"))
+#' # named thresholds
+#' p_significance(model, threshold = list(wt = 0.2, `(Intercept)` = c(-10, 5)))
 #' }
 #' @export
 p_significance <- function(x, ...) {
@@ -128,11 +136,26 @@ p_significance.data.frame <- function(x, threshold = "default", rvar_col = NULL,
   }
 
 
-  threshold <- .select_threshold_ps(threshold = threshold)
+  threshold <- .select_threshold_ps(threshold = threshold, params = x)
   x <- .select_nums(x)
 
   if (ncol(x) == 1) {
     ps <- .p_significance(x[, 1], threshold = threshold, ...)
+  } else if (is.list(threshold)) {
+    # check if list of values contains only valid values
+    threshold <- .check_list_range(threshold, x, larger_two = TRUE)
+    # apply thresholds to each column
+    ps <- mapply(
+      function(p, thres) {
+        .p_significance(
+          p,
+          threshold = thres
+        )
+      },
+      x,
+      threshold,
+      SIMPLIFY = FALSE
+    )
   } else {
     ps <- sapply(x, .p_significance, threshold = threshold, simplify = TRUE, ...)
   }
@@ -268,12 +291,15 @@ p_significance.stanreg <- function(x,
                                    ...) {
   effects <- match.arg(effects)
   component <- match.arg(component)
-  threshold <- .select_threshold_ps(model = x, threshold = threshold, verbose = verbose)
+  params <- insight::get_parameters(x, effects = effects, component = component, parameters = parameters)
 
-  result <- p_significance(
-    insight::get_parameters(x, effects = effects, component = component, parameters = parameters),
-    threshold = threshold
+  threshold <- .select_threshold_ps(
+    model = x,
+    threshold = threshold,
+    params = params,
+    verbose = verbose
   )
+  result <- p_significance(params, threshold = threshold)
 
   cleaned_parameters <- insight::clean_parameters(x)
   out <- .prepare_output(result, cleaned_parameters, inherits(x, "stanmvreg"))
@@ -304,12 +330,15 @@ p_significance.brmsfit <- function(x,
                                    ...) {
   effects <- match.arg(effects)
   component <- match.arg(component)
-  threshold <- .select_threshold_ps(model = x, threshold = threshold, verbose = verbose)
+  params <- insight::get_parameters(x, effects = effects, component = component, parameters = parameters)
 
-  result <- p_significance(
-    insight::get_parameters(x, effects = effects, component = component, parameters = parameters),
-    threshold = threshold
+  threshold <- .select_threshold_ps(
+    model = x,
+    threshold = threshold,
+    params = params,
+    verbose = verbose
   )
+  result <- p_significance(params, threshold = threshold)
 
   cleaned_parameters <- insight::clean_parameters(x)
   out <- .prepare_output(result, cleaned_parameters)
@@ -364,21 +393,92 @@ as.double.p_significance <- as.numeric.p_significance
 # helpers --------------------------
 
 #' @keywords internal
-.select_threshold_ps <- function(model = NULL, threshold = "default", verbose = TRUE) {
+.select_threshold_ps <- function(model = NULL, threshold = "default", params = NULL, verbose = TRUE) {
+  if (is.list(threshold)) {
+    # if we have named elements, complete list
+    if (!is.null(params)) {
+      named_threshold <- names(threshold)
+      if (!is.null(named_threshold)) {
+        # find out which name belongs to which parameter
+        pos <- match(named_threshold, colnames(params))
+        # if not all element names were found, error
+        if (anyNA(pos)) {
+          insight::format_error(paste(
+            "Not all elements of `threshold` were found in the parameters. Please check following names:",
+            toString(named_threshold[is.na(pos)])
+          ))
+        }
+        # now "fill" non-specified elements with "default"
+        out <- as.list(rep("default", ncol(params)))
+        out[pos] <- threshold
+        # overwrite former threshold
+        threshold <- out
+      }
+    }
+    lapply(threshold, function(i) {
+      out <- .select_threshold_list(model = model, threshold = i, verbose = verbose)
+      if (length(out) == 1) {
+        out <- c(-1 * abs(out), abs(out))
+      }
+      out
+    })
+  } else {
+    .select_threshold_list(model = model, threshold = threshold, verbose = verbose)
+  }
+}
+
+#' @keywords internal
+.select_threshold_list <- function(model = NULL, threshold = "default", verbose = TRUE) {
   # If default
   if (all(threshold == "default")) {
     if (is.null(model)) {
       threshold <- 0.1
     } else {
       threshold <- rope_range(model, verbose = verbose)[2]
     }
-  } else if (!all(is.numeric(threshold)) || length(threshold) > 2) {
+  } else if (!is.list(threshold) && (!all(is.numeric(threshold)) || length(threshold) > 2)) {
     insight::format_error(
       "`threshold` should be one of the following values:",
       "- \"default\", in which case the threshold is based on `rope_range()`",
-      "- a single numeric value (e.g., 0.1), which is used as range around zero (i.e. the threshold range is set to -0.1 and 0.1)",
+      "- a single numeric value (e.g., 0.1), which is used as range around zero (i.e. the threshold range is set to -0.1 and 0.1)", # nolint
       "- a numeric vector of length two (e.g., `c(-0.2, 0.1)`)"
     )
   }
   threshold
 }
+
+.check_list_range <- function(range, params, larger_two = FALSE) {
+  # if we have named elements, complete list
+  named_range <- names(range)
+  if (!is.null(named_range)) {
+    # find out which name belongs to which parameter
+    pos <- match(named_range, colnames(params))
+    # if not all element names were found, error
+    if (anyNA(pos)) {
+      insight::format_error(paste(
+        "Not all elements of `range` were found in the parameters. Please check following names:",
+        toString(named_range[is.na(pos)])
+      ))
+    }
+    # now "fill" non-specified elements with "default"
+    out <- as.list(rep("default", ncol(params)))
+    out[pos] <- range
+    # overwrite former range
+    range <- out
+  }
+  if (length(range) != ncol(params)) {
+    insight::format_error("Length of `range` (i.e. number of ROPE limits) should match the number of parameters.")
+  }
+  # check if list of values contains only valid values
+  checks <- vapply(range, function(r) {
+    if (larger_two) {
+      !all(r == "default") || !all(is.numeric(r)) || length(r) > 2
+    } else {
+      !all(r == "default") || !all(is.numeric(r)) || length(r) != 2
+    }
+  }, logical(1))
+  if (!all(checks)) {
+    insight::format_error("`range` should be 'default' or a vector of 2 numeric values (e.g., c(-0.1, 0.1)).")
+  }
+  range
+}