Skip to content

Commit

Permalink
Flexible ROPE values for describe_posterior (#675)
Browse files Browse the repository at this point in the history
* Flexible ROPE values for describe_posterior
Fixes #643

* fix p_significance

* fix print for p_significance

* docs

* lintr

* print methods

* add test

* Update test-p_significance.R

* news, tests

* equi_test

* fix test

* fix

* update

* DRY

* fix typo

* fix

* allow named vectors

* fix

* Update test-p_significance.R

* fix test warning

* lintr

* revert
  • Loading branch information
strengejacke authored Sep 17, 2024
1 parent cde9716 commit a40c975
Show file tree
Hide file tree
Showing 24 changed files with 644 additions and 147 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: bayestestR
Title: Understand and Describe Bayesian Models and Posterior Distributions
Version: 0.14.0.8
Version: 0.14.0.9
Authors@R:
c(person(given = "Dominique",
family = "Makowski",
Expand Down
10 changes: 7 additions & 3 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,18 @@

## Changes

* Support for `posterior::rvar`-type column in data frames.
For example, a data frame `df` with an `rvar` column `".pred"` can now be
* Support for `posterior::rvar`-type column in data frames.
For example, a data frame `df` with an `rvar` column `".pred"` can now be
called directly via `p_direction(df, rvar_col = ".pred")`.

* Added support for `{marginaleffects}`

* The ROPE or threshold ranges in `rope()`, `describe_posterior()`, `p_significance()`
and `equivalence_test()` can now be specified as a list. This allows for different
ranges for different parameters.

* Results from objects generated by `{emmeans}` (`emmGrid`/`emm_list`) now
return results with appended grid-data.
return results with appended grid-data.

* Usability improvements for `p_direction()`:

Expand Down
8 changes: 5 additions & 3 deletions R/describe_posterior.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@
#' For each "test", the corresponding \pkg{bayestestR} function is called
#' (e.g. [bayestestR::rope()] or [bayestestR::p_direction()]) and its results
#' included in the summary output.
#' @param rope_range ROPE's lower and higher bounds. Should be a list of two
#' values (e.g., `c(-0.1, 0.1)`) or `"default"`. If `"default"`,
#' the bounds are set to `x +- 0.1*SD(response)`.
#' @param rope_range ROPE's lower and higher bounds. Should be a vector of two
#' values (e.g., `c(-0.1, 0.1)`), `"default"` or a list of numeric vectors of
#' the same length as numbers of parameters. If `"default"`, the bounds are
#' set to `x +- 0.1*SD(response)`.
#' @param rope_ci The Credible Interval (CI) probability, corresponding to the
#' proportion of HDI, to use for the percentage in ROPE.
#' @param keep_iterations If `TRUE`, will keep all iterations (draws) of
Expand Down Expand Up @@ -90,6 +91,7 @@
#' describe_posterior(model)
#' describe_posterior(model, centrality = "all", dispersion = TRUE, test = "all")
#' describe_posterior(model, ci = c(0.80, 0.90))
#' describe_posterior(model, rope_range = list(c(-10, 5), c(-0.2, 0.2), "default"))
#'
#' # emmeans estimates
#' # -----------------------------------------------
Expand Down
59 changes: 33 additions & 26 deletions R/equivalence_test.R
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,10 @@
#' \donttest{
#' model <- rstanarm::stan_glm(mpg ~ wt + cyl, data = mtcars)
#' equivalence_test(model)
#' # multiple ROPE ranges - asymmetric, symmetric, default
#' equivalence_test(model, range = list(c(10, 40), c(-5, -4), "default"))
#' # named ROPE ranges
#' equivalence_test(model, range = list(wt = c(-5, -4), `(Intercept)` = c(10, 40)))
#'
#' # plot result
#' test <- equivalence_test(model)
Expand Down Expand Up @@ -163,13 +167,33 @@ equivalence_test.data.frame <- function(x, range = "default", ci = 0.95, rvar_co
return(.append_datagrid(out, x))
}

l <- insight::compact_list(lapply(
x,
equivalence_test,
range = range,
ci = ci,
verbose = verbose
))
# multiple ranges for the parameters - iterate over parameters and range
if (is.list(range)) {
# check if list of values contains only valid values
range <- .check_list_range(range, x)
# apply thresholds to each column
l <- insight::compact_list(mapply(
function(p, r) {
equivalence_test(
p,
range = r,
ci = ci,
verbose = verbose
)
},
x,
range,
SIMPLIFY = FALSE
))
} else {
l <- insight::compact_list(lapply(
x,
equivalence_test,
range = range,
ci = ci,
verbose = verbose
))
}

dat <- do.call(rbind, l)
out <- data.frame(
Expand Down Expand Up @@ -244,7 +268,7 @@ equivalence_test.BFBayesFactor <- function(x, range = "default", ci = 0.95, verb
verbose = TRUE) {
if (all(range == "default")) {
range <- rope_range(x, verbose = verbose)
} else if (!all(is.numeric(range)) || length(range) != 2L) {
} else if (!is.list(range) && (!all(is.numeric(range)) || length(range) != 2L)) {
insight::format_error("`range` should be 'default' or a vector of 2 numeric values (e.g., c(-0.1, 0.1)).")
}

Expand All @@ -257,24 +281,7 @@ equivalence_test.BFBayesFactor <- function(x, range = "default", ci = 0.95, verb
verbose = verbose
)

l <- sapply(
params,
equivalence_test,
range = range,
ci = ci,
verbose = verbose,
simplify = FALSE
)

dat <- do.call(rbind, l)
out <- data.frame(
Parameter = rep(names(l), each = nrow(dat) / length(l)),
dat,
stringsAsFactors = FALSE
)

class(out) <- unique(c("equivalence_test", "see_equivalence_test", class(out)))
out
equivalence_test(params, range = range, ci = ci, verbose = verbose)
}


Expand Down
6 changes: 3 additions & 3 deletions R/eti.R
Original file line number Diff line number Diff line change
Expand Up @@ -316,8 +316,8 @@ eti.get_predicted <- function(x, ci = 0.95, use_iterations = FALSE, verbose = TR
))

data.frame(
"CI" = ci,
"CI_low" = results[1],
"CI_high" = results[2]
CI = ci,
CI_low = results[1],
CI_high = results[2]
)
}
2 changes: 1 addition & 1 deletion R/p_rope.R
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ p_rope.mcmc.list <- p_rope.mcmc
#' @keywords internal
.p_rope <- function(rope_rez) {
cols <- c("Parameter", "ROPE_low", "ROPE_high", "ROPE_Percentage", "Effects", "Component")
out <- as.data.frame(rope_rez[cols[cols %in% names(rope_rez)]])
out <- as.data.frame(rope_rez)[cols[cols %in% names(rope_rez)]]
names(out)[names(out) == "ROPE_Percentage"] <- "p_ROPE"

class(out) <- c("p_rope", "see_p_rope", "data.frame")
Expand Down
126 changes: 113 additions & 13 deletions R/p_significance.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,11 @@
#' (i.e. the threshold range is set to -0.1 and 0.1, i.e. reflects a symmetric
#' interval)
#' - a numeric vector of length two (e.g., `c(-0.2, 0.1)`), useful for
#' asymmetric intervals.
#' asymmetric intervals
#' - a list of numeric vectors, where each vector corresponds to a parameter
#' - a list of *named* numeric vectors, where names correspond to parameter
#' names. In this case, all parameters that have no matching name in `threshold`
#' will be set to `"default"`.
#' @inheritParams rope
#' @inheritParams hdi
#'
Expand Down Expand Up @@ -53,6 +57,10 @@
#' chains = 2, refresh = 0
#' )
#' p_significance(model)
#' # multiple thresholds - asymmetric, symmetric, default
#' p_significance(model, threshold = list(c(-10, 5), 0.2, "default"))
#' # named thresholds
#' p_significance(model, threshold = list(wt = 0.2, `(Intercept)` = c(-10, 5)))
#' }
#' @export
p_significance <- function(x, ...) {
Expand Down Expand Up @@ -128,11 +136,26 @@ p_significance.data.frame <- function(x, threshold = "default", rvar_col = NULL,
}


threshold <- .select_threshold_ps(threshold = threshold)
threshold <- .select_threshold_ps(threshold = threshold, params = x)
x <- .select_nums(x)

if (ncol(x) == 1) {
ps <- .p_significance(x[, 1], threshold = threshold, ...)
} else if (is.list(threshold)) {
# check if list of values contains only valid values
threshold <- .check_list_range(threshold, x, larger_two = TRUE)
# apply thresholds to each column
ps <- mapply(
function(p, thres) {
.p_significance(
p,
threshold = thres
)
},
x,
threshold,
SIMPLIFY = FALSE
)
} else {
ps <- sapply(x, .p_significance, threshold = threshold, simplify = TRUE, ...)
}
Expand Down Expand Up @@ -268,12 +291,15 @@ p_significance.stanreg <- function(x,
...) {
effects <- match.arg(effects)
component <- match.arg(component)
threshold <- .select_threshold_ps(model = x, threshold = threshold, verbose = verbose)
params <- insight::get_parameters(x, effects = effects, component = component, parameters = parameters)

result <- p_significance(
insight::get_parameters(x, effects = effects, component = component, parameters = parameters),
threshold = threshold
threshold <- .select_threshold_ps(
model = x,
threshold = threshold,
params = params,
verbose = verbose
)
result <- p_significance(params, threshold = threshold)

cleaned_parameters <- insight::clean_parameters(x)
out <- .prepare_output(result, cleaned_parameters, inherits(x, "stanmvreg"))
Expand Down Expand Up @@ -304,12 +330,15 @@ p_significance.brmsfit <- function(x,
...) {
effects <- match.arg(effects)
component <- match.arg(component)
threshold <- .select_threshold_ps(model = x, threshold = threshold, verbose = verbose)
params <- insight::get_parameters(x, effects = effects, component = component, parameters = parameters)

result <- p_significance(
insight::get_parameters(x, effects = effects, component = component, parameters = parameters),
threshold = threshold
threshold <- .select_threshold_ps(
model = x,
threshold = threshold,
params = params,
verbose = verbose
)
result <- p_significance(params, threshold = threshold)

cleaned_parameters <- insight::clean_parameters(x)
out <- .prepare_output(result, cleaned_parameters)
Expand Down Expand Up @@ -364,21 +393,92 @@ as.double.p_significance <- as.numeric.p_significance
# helpers --------------------------

#' @keywords internal
.select_threshold_ps <- function(model = NULL, threshold = "default", verbose = TRUE) {
.select_threshold_ps <- function(model = NULL, threshold = "default", params = NULL, verbose = TRUE) {
if (is.list(threshold)) {
# if we have named elements, complete list
if (!is.null(params)) {
named_threshold <- names(threshold)
if (!is.null(named_threshold)) {
# find out which name belongs to which parameter
pos <- match(named_threshold, colnames(params))
# if not all element names were found, error
if (anyNA(pos)) {
insight::format_error(paste(
"Not all elements of `threshold` were found in the parameters. Please check following names:",
toString(named_threshold[is.na(pos)])
))
}
# now "fill" non-specified elements with "default"
out <- as.list(rep("default", ncol(params)))
out[pos] <- threshold
# overwrite former threshold
threshold <- out
}
}
lapply(threshold, function(i) {
out <- .select_threshold_list(model = model, threshold = i, verbose = verbose)
if (length(out) == 1) {
out <- c(-1 * abs(out), abs(out))
}
out
})
} else {
.select_threshold_list(model = model, threshold = threshold, verbose = verbose)
}
}

#' @keywords internal
.select_threshold_list <- function(model = NULL, threshold = "default", verbose = TRUE) {
# If default
if (all(threshold == "default")) {
if (is.null(model)) {
threshold <- 0.1
} else {
threshold <- rope_range(model, verbose = verbose)[2]
}
} else if (!all(is.numeric(threshold)) || length(threshold) > 2) {
} else if (!is.list(threshold) && (!all(is.numeric(threshold)) || length(threshold) > 2)) {
insight::format_error(
"`threshold` should be one of the following values:",
"- \"default\", in which case the threshold is based on `rope_range()`",
"- a single numeric value (e.g., 0.1), which is used as range around zero (i.e. the threshold range is set to -0.1 and 0.1)",
"- a single numeric value (e.g., 0.1), which is used as range around zero (i.e. the threshold range is set to -0.1 and 0.1)", # nolint
"- a numeric vector of length two (e.g., `c(-0.2, 0.1)`)"
)
}
threshold
}

.check_list_range <- function(range, params, larger_two = FALSE) {
# if we have named elements, complete list
named_range <- names(range)
if (!is.null(named_range)) {
# find out which name belongs to which parameter
pos <- match(named_range, colnames(params))
# if not all element names were found, error
if (anyNA(pos)) {
insight::format_error(paste(
"Not all elements of `range` were found in the parameters. Please check following names:",
toString(named_range[is.na(pos)])
))
}
# now "fill" non-specified elements with "default"
out <- as.list(rep("default", ncol(params)))
out[pos] <- range
# overwrite former range
range <- out
}
if (length(range) != ncol(params)) {
insight::format_error("Length of `range` (i.e. number of ROPE limits) should match the number of parameters.")
}
# check if list of values contains only valid values
checks <- vapply(range, function(r) {
if (larger_two) {
!all(r == "default") || !all(is.numeric(r)) || length(r) > 2
} else {
!all(r == "default") || !all(is.numeric(r)) || length(r) != 2
}
}, logical(1))
if (!all(checks)) {
insight::format_error("`range` should be 'default' or a vector of 2 numeric values (e.g., c(-0.1, 0.1)).")
}
range
}
Loading

0 comments on commit a40c975

Please sign in to comment.