From 00662534ff7ff2624ec7a636597803e75be1accd Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 3 Jul 2023 14:35:05 +0200 Subject: [PATCH] Code style (#615) * code style * examples * test * examples * examples * examples, lintr * rd file * styler * try emmeans dev * rd cleanup * fix * test cov * tests * examples * fix test, example * examples * rd * examples * examples * examples * exa * minor --- DESCRIPTION | 1 + R/bayesfactor_models.R | 18 +- R/bayesfactor_parameters.R | 4 +- R/bayesfactor_restricted.R | 4 +- R/ci.R | 60 +++--- R/describe_posterior.R | 2 +- R/equivalence_test.R | 4 +- R/estimate_density.R | 4 +- R/eti.R | 2 +- R/hdi.R | 33 +-- R/mcse.R | 68 ++++--- R/mediation.R | 46 ++--- R/overlap.R | 2 +- R/p_map.R | 4 +- R/print.bayesfactor_models.R | 6 +- R/rope.R | 190 +++++++++--------- R/rope_range.R | 8 +- R/sexit.R | 4 +- R/sexit_thresholds.R | 8 +- R/si.R | 44 ++-- R/simulate_priors.R | 8 +- R/simulate_simpson.R | 6 +- R/spi.R | 4 +- R/unupdate.R | 5 +- R/weighted_posteriors.R | 45 +++-- man/bayesfactor_models.Rd | 12 +- man/bayesfactor_parameters.Rd | 4 +- man/bayesfactor_restricted.Rd | 4 +- man/bci.Rd | 25 ++- man/ci.Rd | 22 +- man/cwi.Rd | 3 +- man/describe_posterior.Rd | 4 +- man/equivalence_test.Rd | 20 +- man/estimate_density.Rd | 4 +- man/eti.Rd | 27 +-- man/hdi.Rd | 31 +-- man/mediation.Rd | 17 +- man/p_map.Rd | 4 +- man/p_rope.Rd | 16 +- man/p_significance.Rd | 3 +- man/rope.Rd | 109 ++++++---- man/rope_range.Rd | 8 +- man/sexit.Rd | 4 +- man/sexit_thresholds.Rd | 11 +- man/si.Rd | 45 +++-- man/simulate_prior.Rd | 4 +- man/simulate_simpson.Rd | 6 +- man/spi.Rd | 7 +- man/unupdate.Rd | 3 +- man/weighted_posteriors.Rd | 43 ++-- tests/testthat/test-bayesian_as_frequentist.R | 29 +++ tests/testthat/test-blavaan.R | 48 ++--- tests/testthat/test-ci.R | 23 ++- tests/testthat/test-overlap.R | 2 + 54 files changed, 612 insertions(+), 506 deletions(-) create mode 100644 tests/testthat/test-bayesian_as_frequentist.R diff --git a/DESCRIPTION b/DESCRIPTION index e0d072773..650acd770 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -125,3 +125,4 @@ Config/Needs/website: rstudio/bslib, r-lib/pkgdown, easystats/easystatstemplate +Remotes: rvlenth/emmeans diff --git a/R/bayesfactor_models.R b/R/bayesfactor_models.R index 168cab5a3..0ef678d79 100644 --- a/R/bayesfactor_models.R +++ b/R/bayesfactor_models.R @@ -9,13 +9,13 @@ #' @param ... Fitted models (see details), all fit on the same data, or a single #' `BFBayesFactor` object (see 'Details'). Ignored in `as.matrix()`, #' `update()`. If the following named arguments are present, they are passed -#' to [insight::get_loglikelihood] (see details): +#' to [`insight::get_loglikelihood()`] (see details): #' - `estimator` (defaults to `"ML"`) #' - `check_response` (defaults to `FALSE`) #' @param denominator Either an integer indicating which of the models to use as #' the denominator, or a model to be used as a denominator. Ignored for #' `BFBayesFactor`. -#' @param object,x A [bayesfactor_models()] object. +#' @param object,x A [`bayesfactor_models()`] object. #' @param subset Vector of model indices to keep or remove. #' @param reference Index of model to reference to, or `"top"` to #' reference to the best model, or `"bottom"` to reference to the worst @@ -26,9 +26,9 @@ #' implemented in the \href{https://easystats.github.io/see/}{\pkg{see}-package}. #' #' @details -#' If the passed models are supported by \pkg{insight} the DV of all models will be tested for equality -#' (else this is assumed to be true), and the models' terms will be extracted (allowing for follow-up -#' analysis with `bayesfactor_inclusion`). +#' If the passed models are supported by **insight** the DV of all models will +#' be tested for equality (else this is assumed to be true), and the models' +#' terms will be extracted (allowing for follow-up analysis with `bayesfactor_inclusion`). #' #' - For `brmsfit` or `stanreg` models, Bayes factors are computed using the \CRANpkg{bridgesampling} package. #' - `brmsfit` models must have been fitted with `save_pars = save_pars(all = TRUE)`. @@ -44,9 +44,9 @@ #' testing is substantially larger than for estimation (the default of 4000 #' samples may not be enough in many cases). A conservative rule of thumb is to #' obtain 10 times more samples than would be required for estimation -#' (\cite{Gronau, Singmann, & Wagenmakers, 2017}). If less than 40,000 samples +#' (_Gronau, Singmann, & Wagenmakers, 2017_). If less than 40,000 samples #' are detected, `bayesfactor_models()` gives a warning. -#' \cr \cr +#' #' See also [the Bayes factors vignette](https://easystats.github.io/bayestestR/articles/bayes_factors.html). #' #' @inheritSection bayesfactor_parameters Interpreting Bayes Factors @@ -220,11 +220,11 @@ bayesfactor_models.default <- function(..., denominator = 1, verbose = TRUE) { any(vapply(mods, insight::is_mixed_model, TRUE)) && !isTRUE(attr(objects, "same_fixef")) && verbose) { - insight::format_warning( + insight::format_warning(paste0( "Information criteria (like BIC) based on REML fits (i.e. `estimator=\"REML\"`)", "are not recommended for comparison between models with different fixed effects.", "Concider setting `estimator=\"ML\"`." - ) + )) } } else if (verbose) { insight::format_alert("Unable to validate that all models were fit with the same data.") diff --git a/R/bayesfactor_parameters.R b/R/bayesfactor_parameters.R index c8889ef1c..1251c83d5 100644 --- a/R/bayesfactor_parameters.R +++ b/R/bayesfactor_parameters.R @@ -132,9 +132,7 @@ #' #' # emmGrid objects #' # --------------- -#' group_diff <- suppressWarnings( -#' pairs(emmeans(stan_model, ~group, data = sleep)) -#' ) +#' group_diff <- pairs(emmeans(stan_model, ~group, data = sleep)) #' bayesfactor_parameters(group_diff, prior = stan_model, verbose = FALSE) #' #' # Or diff --git a/R/bayesfactor_restricted.R b/R/bayesfactor_restricted.R index 37108f70c..77ae4fc7e 100644 --- a/R/bayesfactor_restricted.R +++ b/R/bayesfactor_restricted.R @@ -94,9 +94,7 @@ #' contrasts(disgust$condition) <- contr.equalprior_pairs # see vignette #' fit_model <- rstanarm::stan_glm(score ~ condition, data = disgust, family = gaussian()) #' -#' em_condition <- suppressWarnings( -#' emmeans::emmeans(fit_model, ~condition, data = disgust) -#' ) +#' em_condition <- emmeans::emmeans(fit_model, ~condition, data = disgust) #' hyps <- c("lemon < control & control < sulfur") #' #' bayesfactor_restricted(em_condition, prior = fit_model, hypothesis = hyps) diff --git a/R/ci.R b/R/ci.R index ca5941da9..cdc5d9eb9 100644 --- a/R/ci.R +++ b/R/ci.R @@ -4,36 +4,37 @@ #' (SI) for Bayesian and frequentist models. The Documentation is accessible #' for: #' -#' \itemize{ -#' \item [Bayesian models](https://easystats.github.io/bayestestR/articles/credible_interval.html) -#' \item [Frequentist models](https://easystats.github.io/parameters/reference/ci.default.html) -#' } +#' - [Bayesian models](https://easystats.github.io/bayestestR/articles/credible_interval.html) +#' - [Frequentist models](https://easystats.github.io/parameters/reference/ci.default.html) #' -#' @param x A `stanreg` or `brmsfit` model, or a vector representing a posterior distribution. -#' @param method Can be ['ETI'][eti] (default), ['HDI'][hdi], ['BCI'][bci], ['SPI'][spi] or ['SI'][si]. +#' @param x A `stanreg` or `brmsfit` model, or a vector representing a posterior +#' distribution. +#' @param method Can be ["ETI"][eti] (default), ["HDI"][hdi], ["BCI"][bci], +#' ["SPI"][spi] or ["SI"][si]. #' @param ci Value or vector of probability of the CI (between 0 and 1) -#' to be estimated. Default to `.95` (`95%`). +#' to be estimated. Default to `0.95` (`95%`). #' @inheritParams hdi #' @inheritParams si #' @inherit hdi seealso #' @family ci #' #' @return A data frame with following columns: -#' \itemize{ -#' \item `Parameter` The model parameter(s), if `x` is a model-object. If `x` is a vector, this column is missing. -#' \item `CI` The probability of the credible interval. -#' \item `CI_low`, `CI_high` The lower and upper credible interval limits for the parameters. -#' } +#' +#' - `Parameter` The model parameter(s), if `x` is a model-object. If `x` is a +#' vector, this column is missing. +#' - `CI` The probability of the credible interval. +#' - `CI_low`, `CI_high` The lower and upper credible interval limits for the parameters. #' #' @note When it comes to interpretation, we recommend thinking of the CI in terms of #' an "uncertainty" or "compatibility" interval, the latter being defined as -#' \dQuote{Given any value in the interval and the background assumptions, -#' the data should not seem very surprising} (\cite{Gelman & Greenland 2019}). -#' \cr \cr -#' There is also a [`plot()`-method](https://easystats.github.io/see/articles/bayestestR.html) implemented in the \href{https://easystats.github.io/see/}{\pkg{see}-package}. +#' "Given any value in the interval and the background assumptions, +#' the data should not seem very surprising" (_Gelman & Greenland 2019_). #' -#' @references Gelman A, Greenland S. Are confidence intervals better termed "uncertainty intervals"? BMJ 2019;l5381. 10.1136/bmj.l5381 +#' There is also a [`plot()`-method](https://easystats.github.io/see/articles/bayestestR.html) implemented in the \href{https://easystats.github.io/see/}{\pkg{see}-package}. #' +#' @references +#' Gelman A, Greenland S. Are confidence intervals better termed "uncertainty +#' intervals"? BMJ 2019;l5381. 10.1136/bmj.l5381 #' #' @examplesIf require("rstanarm", quietly = TRUE) #' library(bayestestR) @@ -58,7 +59,7 @@ #' ci(bf, method = "HDI") #' #' @examplesIf require("emmeans", quietly = TRUE) && require("rstanarm", quietly = TRUE) -#' model <- suppressWarnings(emtrends(model, ~1, "wt", data = mtcars)) +#' model <- emtrends(model, ~1, "wt", data = mtcars) #' ci(model, method = "ETI") #' ci(model, method = "HDI") #' @export @@ -138,9 +139,12 @@ ci <- function(x, ...) { ) ) } else { - insight::format_error( - "`method` should be 'ETI' (for equal-tailed interval),'HDI' (for highest density interval), 'BCI' (for bias corrected and accelerated bootstrap intervals), 'SPI' (for shortest probability interval) or 'SI' (for support interval)." - ) + insight::format_error(paste0( + "`method` should be 'ETI' (for equal-tailed interval), ", + "'HDI' (for highest density interval), 'BCI' (for bias corrected and ", + "accelerated bootstrap intervals), 'SPI' (for shortest probability ", + "interval) or 'SI' (for support interval)." + )) } } @@ -157,7 +161,6 @@ ci.numeric <- function(x, ci = 0.95, method = "ETI", verbose = TRUE, BF = 1, ... ci.data.frame <- ci.numeric - #' @export ci.draws <- function(x, ci = 0.95, method = "ETI", verbose = TRUE, BF = 1, ...) { .ci_bayesian(.posterior_draws_to_df(x), ci = ci, method = method, verbose = verbose, BF = BF, ...) @@ -185,7 +188,6 @@ ci.emmGrid <- function(x, ci = NULL, ...) { ci.emm_list <- ci.emmGrid - #' @rdname ci #' @export ci.sim.merMod <- function(x, @@ -207,7 +209,6 @@ ci.sim.merMod <- function(x, } - #' @rdname ci #' @export ci.sim <- function(x, @@ -227,7 +228,6 @@ ci.sim <- function(x, } - #' @rdname ci #' @export ci.stanreg <- function(x, @@ -285,22 +285,17 @@ ci.brmsfit <- function(x, ) } - #' @export ci.stanfit <- ci.stanreg - #' @export ci.blavaan <- ci.stanreg - #' @rdname ci #' @export ci.BFBayesFactor <- ci.numeric - - #' @rdname ci #' @export ci.MCMCglmm <- function(x, ci = 0.95, method = "ETI", verbose = TRUE, ...) { @@ -338,22 +333,17 @@ ci.bcplm <- function(x, ci = 0.95, method = "ETI", verbose = TRUE, ...) { ci(insight::get_parameters(x), ci = ci, method = method, verbose = verbose, ...) } - #' @export ci.blrm <- ci.bcplm - #' @export ci.mcmc <- ci.bcplm - #' @export ci.mcmc.list <- ci.bcplm - #' @export ci.BGGM <- ci.bcplm - #' @export ci.get_predicted <- ci.data.frame diff --git a/R/describe_posterior.R b/R/describe_posterior.R index e62b055ad..b292cfaee 100644 --- a/R/describe_posterior.R +++ b/R/describe_posterior.R @@ -92,7 +92,7 @@ #' #' # emmeans estimates #' # ----------------------------------------------- -#' describe_posterior(suppressWarnings(emtrends(model, ~1, "wt"))) +#' describe_posterior(emtrends(model, ~1, "wt")) #' } #' #' # BayesFactor objects diff --git a/R/equivalence_test.R b/R/equivalence_test.R index 1703e8f88..a0da152f6 100644 --- a/R/equivalence_test.R +++ b/R/equivalence_test.R @@ -89,9 +89,7 @@ #' plot(test) #' #' library(emmeans) -#' equivalence_test( -#' suppressWarnings(emtrends(model, ~1, "wt", data = mtcars)) -#' ) +#' equivalence_test(emtrends(model, ~1, "wt", data = mtcars)) #' #' library(brms) #' model <- brms::brm(mpg ~ wt + cyl, data = mtcars) diff --git a/R/estimate_density.R b/R/estimate_density.R index f65184749..554290adf 100644 --- a/R/estimate_density.R +++ b/R/estimate_density.R @@ -82,9 +82,7 @@ #' head(estimate_density(model)) #' #' library(emmeans) -#' head(estimate_density(suppressWarnings( -#' emtrends(model, ~1, "wt", data = mtcars) -#' ))) +#' head(estimate_density(emtrends(model, ~1, "wt", data = mtcars))) #' #' # brms models #' # ----------------------------------------------- diff --git a/R/eti.R b/R/eti.R index 9ec8cb12e..04d47498d 100644 --- a/R/eti.R +++ b/R/eti.R @@ -31,7 +31,7 @@ #' eti(model, ci = c(0.80, 0.89, 0.95)) #' #' library(emmeans) -#' eti(suppressWarnings(emtrends(model, ~1, "wt", data = mtcars))) +#' eti(emtrends(model, ~1, "wt", data = mtcars)) #' #' library(brms) #' model <- brms::brm(mpg ~ wt + cyl, data = mtcars) diff --git a/R/hdi.R b/R/hdi.R index 8563b389b..0b365de90 100644 --- a/R/hdi.R +++ b/R/hdi.R @@ -31,22 +31,24 @@ #' from each tail of the distribution and always include the median, the HDI is #' *not* equal-tailed and therefore always includes the mode(s) of posterior #' distributions. While this can be useful to better represent the credibility -#' mass of a distribution, the HDI also has some limitations. See [spi()] for +#' mass of a distribution, the HDI also has some limitations. See [`spi()`] for #' details. -#' \cr \cr +#' #' The [`95%` or `89%` Credible Intervals (CI)](https://easystats.github.io/bayestestR/articles/credible_interval.html) -#' are two reasonable ranges to characterize the uncertainty related to the estimation (see [here](https://easystats.github.io/bayestestR/articles/credible_interval.html) for a discussion about the differences between these two values). -#' \cr +#' are two reasonable ranges to characterize the uncertainty related to the +#' estimation (see [here](https://easystats.github.io/bayestestR/articles/credible_interval.html) +#' for a discussion about the differences between these two values). +#' #' The `89%` intervals (`ci = 0.89`) are deemed to be more stable than, for -#' instance, `95%` intervals (\cite{Kruschke, 2014}). An effective sample size +#' instance, `95%` intervals (_Kruschke, 2014_). An effective sample size #' of at least 10.000 is recommended if one wants to estimate `95%` intervals -#' with high precision (\cite{Kruschke, 2014, p. 183ff}). Unfortunately, the +#' with high precision (_Kruschke, 2014, p. 183ff_). Unfortunately, the #' default number of posterior samples for most Bayes packages (e.g., `rstanarm` #' or `brms`) is only 4.000 (thus, you might want to increase it when fitting #' your model). Moreover, 89 indicates the arbitrariness of interval limits - #' its only remarkable property is being the highest prime number that does not -#' exceed the already unstable `95%` threshold (\cite{McElreath, 2015}). -#' \cr +#' exceed the already unstable `95%` threshold (_McElreath, 2015_). +#' #' However, `95%` has some [advantages #' too](https://easystats.github.io/blog/posts/bayestestr_95/). For instance, it #' shares (in the case of a normal posterior distribution) an intuitive @@ -55,17 +57,17 @@ #' makes analyses more conservative (i.e., the probability of covering 0 is #' larger for the `95%` CI than for lower ranges such as `89%`), which is a good #' thing in the context of the reproducibility crisis. -#' \cr \cr +#' #' A `95%` equal-tailed interval (ETI) has `2.5%` of the distribution on either #' side of its limits. It indicates the 2.5th percentile and the 97.5h #' percentile. In symmetric distributions, the two methods of computing credible #' intervals, the ETI and the [HDI][hdi], return similar results. -#' \cr \cr +#' #' This is not the case for skewed distributions. Indeed, it is possible that #' parameter values in the ETI have lower credibility (are less probable) than #' parameter values outside the ETI. This property seems undesirable as a summary #' of the credible values in a distribution. -#' \cr \cr +#' #' On the other hand, the ETI range does change when transformations are applied #' to the distribution (for instance, for a log odds scale to probabilities): #' the lower and higher bounds of the transformed distribution will correspond @@ -110,10 +112,11 @@ #' } #' @author Credits go to **ggdistribute** and [**HDInterval**](https://github.com/mikemeredith/HDInterval). #' -#' @references \itemize{ -#' \item Kruschke, J. (2014). Doing Bayesian data analysis: A tutorial with R, JAGS, and Stan. Academic Press. -#' \item McElreath, R. (2015). Statistical rethinking: A Bayesian course with examples in R and Stan. Chapman and Hall/CRC. -#' } +#' @references +#' - Kruschke, J. (2014). Doing Bayesian data analysis: A tutorial with R, JAGS, +#' and Stan. Academic Press. +#' - McElreath, R. (2015). Statistical rethinking: A Bayesian course with +#' examples in R and Stan. Chapman and Hall/CRC. #' #' @export hdi <- function(x, ...) { diff --git a/R/mcse.R b/R/mcse.R index e30ff75a9..8862673f1 100644 --- a/R/mcse.R +++ b/R/mcse.R @@ -30,26 +30,28 @@ mcse <- function(model, ...) { #' @export -mcse.brmsfit <- function(model, effects = c("fixed", "random", "all"), component = c("conditional", "zi", "zero_inflated", "all"), parameters = NULL, ...) { +mcse.brmsfit <- function(model, + effects = c("fixed", "random", "all"), + component = c("conditional", "zi", "zero_inflated", "all"), + parameters = NULL, + ...) { # check arguments effects <- match.arg(effects) component <- match.arg(component) - params <- - insight::get_parameters( - model, - effects = effects, - component = component, - parameters = parameters - ) - - ess <- - effective_sample( - model, - effects = effects, - component = component, - parameters = parameters - ) + params <- insight::get_parameters( + model, + effects = effects, + component = component, + parameters = parameters + ) + + ess <- effective_sample( + model, + effects = effects, + component = component, + parameters = parameters + ) .mcse(params, stats::setNames(ess$ESS, ess$Parameter)) } @@ -57,26 +59,28 @@ mcse.brmsfit <- function(model, effects = c("fixed", "random", "all"), component #' @rdname mcse #' @export -mcse.stanreg <- function(model, effects = c("fixed", "random", "all"), component = c("location", "all", "conditional", "smooth_terms", "sigma", "distributional", "auxiliary"), parameters = NULL, ...) { +mcse.stanreg <- function(model, + effects = c("fixed", "random", "all"), + component = c("location", "all", "conditional", "smooth_terms", "sigma", "distributional", "auxiliary"), + parameters = NULL, + ...) { # check arguments effects <- match.arg(effects) component <- match.arg(component) - params <- - insight::get_parameters( - model, - effects = effects, - component = component, - parameters = parameters - ) - - ess <- - effective_sample( - model, - effects = effects, - component = component, - parameters = parameters - ) + params <- insight::get_parameters( + model, + effects = effects, + component = component, + parameters = parameters + ) + + ess <- effective_sample( + model, + effects = effects, + component = component, + parameters = parameters + ) .mcse(params, stats::setNames(ess$ESS, ess$Parameter)) } diff --git a/R/mediation.R b/R/mediation.R index 2f3dcd9eb..e9e551d16 100644 --- a/R/mediation.R +++ b/R/mediation.R @@ -35,27 +35,27 @@ #' samples (use `centrality` for other centrality indices). #' #' @details `mediation()` returns a data frame with information on the -#' *direct effect* (mean value of posterior samples from `treatment` -#' of the outcome model), *mediator effect* (mean value of posterior -#' samples from `mediator` of the outcome model), *indirect effect* -#' (mean value of the multiplication of the posterior samples from -#' `mediator` of the outcome model and the posterior samples from -#' `treatment` of the mediation model) and the total effect (mean -#' value of sums of posterior samples used for the direct and indirect -#' effect). The *proportion mediated* is the indirect effect divided -#' by the total effect. -#' \cr \cr -#' For all values, the `89%` credible intervals are calculated by default. -#' Use `ci` to calculate a different interval. -#' \cr \cr -#' The arguments `treatment` and `mediator` do not necessarily -#' need to be specified. If missing, `mediation()` tries to find the -#' treatment and mediator variable automatically. If this does not work, -#' specify these variables. -#' \cr \cr -#' The direct effect is also called *average direct effect* (ADE), -#' the indirect effect is also called *average causal mediation effects* -#' (ACME). See also \cite{Tingley et al. 2014} and \cite{Imai et al. 2010}. +#' *direct effect* (mean value of posterior samples from `treatment` +#' of the outcome model), *mediator effect* (mean value of posterior +#' samples from `mediator` of the outcome model), *indirect effect* +#' (mean value of the multiplication of the posterior samples from +#' `mediator` of the outcome model and the posterior samples from +#' `treatment` of the mediation model) and the total effect (mean +#' value of sums of posterior samples used for the direct and indirect +#' effect). The *proportion mediated* is the indirect effect divided +#' by the total effect. +#' +#' For all values, the `89%` credible intervals are calculated by default. +#' Use `ci` to calculate a different interval. +#' +#' The arguments `treatment` and `mediator` do not necessarily +#' need to be specified. If missing, `mediation()` tries to find the +#' treatment and mediator variable automatically. If this does not work, +#' specify these variables. +#' +#' The direct effect is also called *average direct effect* (ADE), +#' the indirect effect is also called *average causal mediation effects* +#' (ACME). See also _Tingley et al. 2014_ and _Imai et al. 2010_. #' #' @note There is an `as.data.frame()` method that returns the posterior #' samples of the effects, which can be used for further processing in the @@ -97,7 +97,7 @@ #' m2 <- brm(f1 + f2 + set_rescor(FALSE), data = jobs, cores = 4, refresh = 0) #' #' # Fit Bayesian mediation model in rstanarm -#' m3 <- stan_mvmer( +#' m3 <- suppressWarnings(stan_mvmer( #' list( #' job_seek ~ treat + econ_hard + sex + age + (1 | occp), #' depress2 ~ treat + job_seek + econ_hard + sex + age + (1 | occp) @@ -105,7 +105,7 @@ #' data = jobs, #' cores = 4, #' refresh = 0 -#' ) +#' )) #' #' summary(m1) #' mediation(m2, centrality = "mean", ci = 0.95) diff --git a/R/overlap.R b/R/overlap.R index 710c8a3be..36faf6899 100644 --- a/R/overlap.R +++ b/R/overlap.R @@ -52,7 +52,7 @@ overlap <- function(x, y, method_density = "kernel", method_auc = "trapezoid", p #' @export print.overlap <- function(x, ...) { insight::print_color("# Overlap\n\n", "blue") - cat(sprintf("%.2f", as.numeric(x))) + cat(sprintf("%.1f%%\n", 100 * as.numeric(x))) } diff --git a/R/p_map.R b/R/p_map.R index 78b72d528..0e6bc7485 100644 --- a/R/p_map.R +++ b/R/p_map.R @@ -30,7 +30,9 @@ #' p_map(model) #' #' library(emmeans) -#' p_map(emtrends(model, ~1, "wt", data = mtcars)) +#' p_map(suppressWarnings( +#' emtrends(model, ~1, "wt", data = mtcars) +#' )) #' #' library(brms) #' model <- brms::brm(mpg ~ wt + cyl, data = mtcars) diff --git a/R/print.bayesfactor_models.R b/R/print.bayesfactor_models.R index 54482ac83..551b0ff97 100644 --- a/R/print.bayesfactor_models.R +++ b/R/print.bayesfactor_models.R @@ -16,7 +16,7 @@ print.bayesfactor_models_matrix <- function(x, digits = 2, log = FALSE, exact = models <- colnames(df) models[models == "1"] <- "(Intercept only)" models <- paste0("[", seq_along(models), "] ", models) - k <- max(sapply(c(models, "Denominator"), nchar)) + 2 + k <- max(vapply(c(models, "Denominator"), nchar, numeric(1))) + 2 rownames(df) <- colnames(df) <- NULL df <- cbind(Model = models, df) @@ -25,10 +25,10 @@ print.bayesfactor_models_matrix <- function(x, digits = 2, log = FALSE, exact = out <- insight::export_table( df, caption = c("# Bayes Factors for Model Comparison", "blue"), - subtitle = c(sprintf("\n\n%sNumerator\nDenominator", paste(rep(" ", k), collapse = "")), "cyan"), + subtitle = c(sprintf("\n\n%sNumerator\nDenominator", strrep(" ", k)), "cyan"), footer = if (log) c("\nBayes Factors are on the log-scale.\n", "red") ) - out <- sub("placeholder", "\b\b", out) + out <- sub("placeholder", "\b\b", out, fixed = TRUE) cat(out) invisible(orig_x) diff --git a/R/rope.R b/R/rope.R index 6072febd5..a282019f4 100644 --- a/R/rope.R +++ b/R/rope.R @@ -1,87 +1,99 @@ #' Region of Practical Equivalence (ROPE) #' -#' Compute the proportion of the HDI (default to the `89%` HDI) of a posterior distribution that lies within a region of practical equivalence. +#' Compute the proportion of the HDI (default to the `89%` HDI) of a posterior +#' distribution that lies within a region of practical equivalence. #' -#' @param x Vector representing a posterior distribution. Can also be a `stanreg` or `brmsfit` model. +#' @param x Vector representing a posterior distribution. Can also be a +#' `stanreg` or `brmsfit` model. #' @param range ROPE's lower and higher bounds. Should be `"default"` or -#' depending on the number of outcome variables a vector or a list. In models with one response, -#' `range` should be a vector of length two (e.g., `c(-0.1, 0.1)`). In -#' multivariate models, `range` should be a list with a numeric vectors for -#' each response variable. Vector names should correspond to the name of the response -#' variables. If `"default"` and input is a vector, the range is set to `c(-0.1, -#' 0.1)`. If `"default"` and input is a Bayesian model, -#' [`rope_range()`][rope_range] is used. +#' depending on the number of outcome variables a vector or a list. In +#' models with one response, `range` should be a vector of length two (e.g., +#' `c(-0.1, 0.1)`). In multivariate models, `range` should be a list with a +#' numeric vectors for each response variable. Vector names should correspond +#' to the name of the response variables. If `"default"` and input is a vector, +#' the range is set to `c(-0.1, 0.1)`. If `"default"` and input is a Bayesian +#' model, [`rope_range()`][rope_range] is used. #' @param ci The Credible Interval (CI) probability, corresponding to the -#' proportion of HDI, to use for the percentage in ROPE. +#' proportion of HDI, to use for the percentage in ROPE. #' @param ci_method The type of interval to use to quantify the percentage in -#' ROPE. Can be 'HDI' (default) or 'ETI'. See [ci()]. +#' ROPE. Can be 'HDI' (default) or 'ETI'. See [ci()]. #' #' @inheritParams hdi #' -#' @details -#' \subsection{ROPE}{ -#' Statistically, the probability of a posterior distribution of being -#' different from 0 does not make much sense (the probability of a single value -#' null hypothesis in a continuous distribution is 0). Therefore, the idea -#' underlining ROPE is to let the user define an area around the null value -#' enclosing values that are *equivalent to the null* value for practical -#' purposes (\cite{Kruschke 2010, 2011, 2014}). -#' \cr \cr -#' Kruschke (2018) suggests that such null value could be set, by default, -#' to the -0.1 to 0.1 range of a standardized parameter (negligible effect -#' size according to Cohen, 1988). This could be generalized: For instance, -#' for linear models, the ROPE could be set as `0 +/- .1 * sd(y)`. -#' This ROPE range can be automatically computed for models using the -#' [rope_range] function. -#' \cr \cr -#' Kruschke (2010, 2011, 2014) suggests using the proportion of the `95%` -#' (or `89%`, considered more stable) [HDI][hdi] that falls within the -#' ROPE as an index for "null-hypothesis" testing (as understood under the -#' Bayesian framework, see [`equivalence_test()`][equivalence_test]). -#' } -#' \subsection{Sensitivity to parameter's scale}{ -#' It is important to consider the unit (i.e., the scale) of the predictors -#' when using an index based on the ROPE, as the correct interpretation of the -#' ROPE as representing a region of practical equivalence to zero is dependent -#' on the scale of the predictors. Indeed, the percentage in ROPE depend on -#' the unit of its parameter. In other words, as the ROPE represents a fixed -#' portion of the response's scale, its proximity with a coefficient depends -#' on the scale of the coefficient itself. -#' } -#' \subsection{Multicollinearity: Non-independent covariates}{ -#' When parameters show strong correlations, i.e. when covariates are not -#' independent, the joint parameter distributions may shift towards or -#' away from the ROPE. Collinearity invalidates ROPE and hypothesis -#' testing based on univariate marginals, as the probabilities are conditional -#' on independence. Most problematic are parameters that only have partial -#' overlap with the ROPE region. In case of collinearity, the (joint) distributions -#' of these parameters may either get an increased or decreased ROPE, which -#' means that inferences based on `rope()` are inappropriate -#' (\cite{Kruschke 2014, 340f}). -#' \cr \cr -#' `rope()` performs a simple check for pairwise correlations between -#' parameters, but as there can be collinearity between more than two variables, -#' a first step to check the assumptions of this hypothesis testing is to look -#' at different pair plots. An even more sophisticated check is the projection -#' predictive variable selection (\cite{Piironen and Vehtari 2017}). -#' } -#' \subsection{Strengths and Limitations}{ -#' **Strengths:** Provides information related to the practical relevance of the effects. -#' \cr \cr -#' **Limitations:** A ROPE range needs to be arbitrarily defined. Sensitive to the scale (the unit) of the predictors. Not sensitive to highly significant effects. -#' } +#' @section ROPE: +#' Statistically, the probability of a posterior distribution of being +#' different from 0 does not make much sense (the probability of a single value +#' null hypothesis in a continuous distribution is 0). Therefore, the idea +#' underlining ROPE is to let the user define an area around the null value +#' enclosing values that are *equivalent to the null* value for practical +#' purposes (_Kruschke 2010, 2011, 2014_). +#' +#' Kruschke (2018) suggests that such null value could be set, by default, +#' to the -0.1 to 0.1 range of a standardized parameter (negligible effect +#' size according to Cohen, 1988). This could be generalized: For instance, +#' for linear models, the ROPE could be set as `0 +/- .1 * sd(y)`. +#' This ROPE range can be automatically computed for models using the +#' [rope_range] function. +#' +#' Kruschke (2010, 2011, 2014) suggests using the proportion of the `95%` +#' (or `89%`, considered more stable) [HDI][hdi] that falls within the +#' ROPE as an index for "null-hypothesis" testing (as understood under the +#' Bayesian framework, see [`equivalence_test()`][equivalence_test]). +#' +#' @section Sensitivity to parameter's scale: +#' It is important to consider the unit (i.e., the scale) of the predictors +#' when using an index based on the ROPE, as the correct interpretation of the +#' ROPE as representing a region of practical equivalence to zero is dependent +#' on the scale of the predictors. Indeed, the percentage in ROPE depend on +#' the unit of its parameter. In other words, as the ROPE represents a fixed +#' portion of the response's scale, its proximity with a coefficient depends +#' on the scale of the coefficient itself. +#' +#' @section Multicollinearity - Non-independent covariates: +#' When parameters show strong correlations, i.e. when covariates are not +#' independent, the joint parameter distributions may shift towards or +#' away from the ROPE. Collinearity invalidates ROPE and hypothesis +#' testing based on univariate marginals, as the probabilities are conditional +#' on independence. Most problematic are parameters that only have partial +#' overlap with the ROPE region. In case of collinearity, the (joint) distributions +#' of these parameters may either get an increased or decreased ROPE, which +#' means that inferences based on `rope()` are inappropriate +#' (_Kruschke 2014, 340f_). +#' +#' `rope()` performs a simple check for pairwise correlations between +#' parameters, but as there can be collinearity between more than two variables, +#' a first step to check the assumptions of this hypothesis testing is to look +#' at different pair plots. An even more sophisticated check is the projection +#' predictive variable selection (_Piironen and Vehtari 2017_). +#' +#' @section Strengths and Limitations: +#' **Strengths:** Provides information related to the practical relevance of +#' the effects. +#' +#' **Limitations:** A ROPE range needs to be arbitrarily defined. Sensitive to +#' the scale (the unit) of the predictors. Not sensitive to highly significant +#' effects. #' #' @note There is also a [`plot()`-method](https://easystats.github.io/see/articles/bayestestR.html) implemented in the \href{https://easystats.github.io/see/}{\pkg{see}-package}. #' -#' @references \itemize{ -#' \item Cohen, J. (1988). Statistical power analysis for the behavioural sciences. -#' \item Kruschke, J. K. (2010). What to believe: Bayesian methods for data analysis. Trends in cognitive sciences, 14(7), 293-300. \doi{10.1016/j.tics.2010.05.001}. -#' \item Kruschke, J. K. (2011). Bayesian assessment of null values via parameter estimation and model comparison. Perspectives on Psychological Science, 6(3), 299-312. \doi{10.1177/1745691611406925}. -#' \item Kruschke, J. K. (2014). Doing Bayesian data analysis: A tutorial with R, JAGS, and Stan. Academic Press. \doi{10.1177/2515245918771304}. -#' \item Kruschke, J. K. (2018). Rejecting or accepting parameter values in Bayesian estimation. Advances in Methods and Practices in Psychological Science, 1(2), 270-280. \doi{10.1177/2515245918771304}. -#' \item Makowski D, Ben-Shachar MS, Chen SHA, Lüdecke D (2019) Indices of Effect Existence and Significance in the Bayesian Framework. Frontiers in Psychology 2019;10:2767. \doi{10.3389/fpsyg.2019.02767} -#' \item Piironen, J., & Vehtari, A. (2017). Comparison of Bayesian predictive methods for model selection. Statistics and Computing, 27(3), 711–735. \doi{10.1007/s11222-016-9649-y} -#' } +#' @references +#' - Cohen, J. (1988). Statistical power analysis for the behavioural sciences. +#' - Kruschke, J. K. (2010). What to believe: Bayesian methods for data analysis. +#' Trends in cognitive sciences, 14(7), 293-300. \doi{10.1016/j.tics.2010.05.001}. +#' - Kruschke, J. K. (2011). Bayesian assessment of null values via parameter +#' estimation and model comparison. Perspectives on Psychological Science, +#' 6(3), 299-312. \doi{10.1177/1745691611406925}. +#' - Kruschke, J. K. (2014). Doing Bayesian data analysis: A tutorial with R, +#' JAGS, and Stan. Academic Press. \doi{10.1177/2515245918771304}. +#' - Kruschke, J. K. (2018). Rejecting or accepting parameter values in Bayesian +#' estimation. Advances in Methods and Practices in Psychological Science, +#' 1(2), 270-280. \doi{10.1177/2515245918771304}. +#' - Makowski D, Ben-Shachar MS, Chen SHA, Lüdecke D (2019) Indices of Effect +#' Existence and Significance in the Bayesian Framework. Frontiers in +#' Psychology 2019;10:2767. \doi{10.3389/fpsyg.2019.02767} +#' - Piironen, J., & Vehtari, A. (2017). Comparison of Bayesian predictive +#' methods for model selection. Statistics and Computing, 27(3), 711–735. +#' \doi{10.1007/s11222-016-9649-y} #' #' @examples #' library(bayestestR) @@ -89,30 +101,35 @@ #' rope(x = rnorm(1000, 0, 0.01), range = c(-0.1, 0.1)) #' rope(x = rnorm(1000, 0, 1), range = c(-0.1, 0.1)) #' rope(x = rnorm(1000, 1, 0.01), range = c(-0.1, 0.1)) -#' rope(x = rnorm(1000, 1, 1), ci = c(.90, .95)) +#' rope(x = rnorm(1000, 1, 1), ci = c(0.90, 0.95)) #' \dontrun{ #' library(rstanarm) -#' model <- stan_glm(mpg ~ wt + gear, data = mtcars, chains = 2, iter = 200, refresh = 0) +#' model <- suppressWarnings( +#' stan_glm(mpg ~ wt + gear, data = mtcars, chains = 2, iter = 200, refresh = 0) +#' ) #' rope(model) -#' rope(model, ci = c(.90, .95)) +#' rope(model, ci = c(0.90, 0.95)) #' #' library(emmeans) -#' rope(emtrends(model, ~1, "wt"), ci = c(.90, .95)) +#' rope(emtrends(model, ~1, "wt"), ci = c(0.90, 0.95)) #' #' library(brms) -#' model <- brms::brm(mpg ~ wt + cyl, data = mtcars) +#' model <- brm(mpg ~ wt + cyl, data = mtcars) #' rope(model) -#' rope(model, ci = c(.90, .95)) +#' rope(model, ci = c(0.90, 0.95)) #' #' library(brms) -#' model <- brms::brm(brms::mvbind(mpg, disp) ~ wt + cyl, data = mtcars) +#' model <- brm( +#' bf(mvbind(mpg, disp) ~ wt + cyl) + set_rescor(rescor = TRUE), +#' data = mtcars +#' ) #' rope(model) -#' rope(model, ci = c(.90, .95)) +#' rope(model, ci = c(0.90, 0.95)) #' #' library(BayesFactor) #' bf <- ttestBF(x = rnorm(100, 1, 1)) #' rope(bf) -#' rope(bf, ci = c(.90, .95)) +#' rope(bf, ci = c(0.90, 0.95)) #' } #' @export rope <- function(x, ...) { @@ -149,7 +166,6 @@ rope.numeric <- function(x, range = "default", ci = 0.95, ci_method = "ETI", ver # "do.call(rbind)" does not bind attribute values together # so we need to capture the information about HDI separately - out <- do.call(rbind, rope_values) if (nrow(out) > 1) { out$ROPE_Percentage <- as.numeric(out$ROPE_Percentage) @@ -188,7 +204,6 @@ rope.data.frame <- function(x, range = "default", ci = 0.95, ci_method = "ETI", } - #' @export rope.draws <- function(x, range = "default", ci = 0.95, ci_method = "ETI", verbose = TRUE, ...) { rope(.posterior_draws_to_df(x), range = range, ci = ci, ci_method = ci_method, verbose = verbose, ...) @@ -198,7 +213,6 @@ rope.draws <- function(x, range = "default", ci = 0.95, ci_method = "ETI", verbo rope.rvar <- rope.draws - #' @export rope.emmGrid <- function(x, range = "default", ci = 0.95, ci_method = "ETI", verbose = TRUE, ...) { xdf <- insight::get_parameters(x) @@ -212,7 +226,6 @@ rope.emmGrid <- function(x, range = "default", ci = 0.95, ci_method = "ETI", ver rope.emm_list <- rope.emmGrid - #' @export rope.BFBayesFactor <- function(x, range = "default", ci = 0.95, ci_method = "ETI", verbose = TRUE, ...) { if (all(range == "default")) { @@ -223,7 +236,6 @@ rope.BFBayesFactor <- function(x, range = "default", ci = 0.95, ci_method = "ETI out } - #' @export rope.bamlss <- rope.BFBayesFactor @@ -274,7 +286,6 @@ rope.BGGM <- rope.bcplm rope.mcmc.list <- rope.bcplm - #' @keywords internal .rope <- function(x, range = c(-0.1, 0.1), ci = 0.95, ci_method = "ETI", verbose = TRUE) { ci_bounds <- ci(x, ci = ci, method = ci_method, verbose = verbose) @@ -302,7 +313,6 @@ rope.mcmc.list <- rope.bcplm } - #' @rdname rope #' @export rope.stanreg <- function(x, range = "default", ci = 0.95, ci_method = "ETI", effects = c("fixed", "random", "all"), component = c("location", "all", "conditional", "smooth_terms", "sigma", "distributional", "auxiliary"), parameters = NULL, verbose = TRUE, ...) { @@ -431,7 +441,6 @@ rope.brmsfit <- function(x, } - #' @export rope.sim.merMod <- function(x, range = "default", @@ -501,7 +510,6 @@ rope.sim.merMod <- function(x, } - #' @export rope.sim <- function(x, range = "default", ci = 0.95, ci_method = "ETI", parameters = NULL, verbose = TRUE, ...) { if (all(range == "default")) { @@ -537,8 +545,6 @@ rope.sim <- function(x, range = "default", ci = 0.95, ci_method = "ETI", paramet } - - #' @keywords internal .prepare_rope_df <- function(parms, range, ci, ci_method, verbose) { tmp <- sapply( diff --git a/R/rope_range.R b/R/rope_range.R index 636105fcb..b4c7a99cb 100644 --- a/R/rope_range.R +++ b/R/rope_range.R @@ -44,16 +44,18 @@ #' @examples #' \dontrun{ #' if (require("rstanarm")) { -#' model <- stan_glm( +#' model <- suppressWarnings(stan_glm( #' mpg ~ wt + gear, #' data = mtcars, #' chains = 2, #' iter = 200, #' refresh = 0 -#' ) +#' )) #' rope_range(model) #' -#' model <- stan_glm(vs ~ mpg, data = mtcars, family = "binomial", refresh = 0) +#' model <- suppressWarnings( +#' stan_glm(vs ~ mpg, data = mtcars, family = "binomial", refresh = 0) +#' ) #' rope_range(model) #' } #' diff --git a/R/sexit.R b/R/sexit.R index 920cac3c0..114492592 100644 --- a/R/sexit.R +++ b/R/sexit.R @@ -114,10 +114,10 @@ #' print(s, summary = TRUE) #' #' if (require("rstanarm")) { -#' model <- rstanarm::stan_glm(mpg ~ wt * cyl, +#' model <- suppressWarnings(rstanarm::stan_glm(mpg ~ wt * cyl, #' data = mtcars, #' iter = 400, refresh = 0 -#' ) +#' )) #' s <- sexit(model) #' s #' print(s, summary = TRUE) diff --git a/R/sexit_thresholds.R b/R/sexit_thresholds.R index 539f7986a..b8eeadfc9 100644 --- a/R/sexit_thresholds.R +++ b/R/sexit_thresholds.R @@ -12,16 +12,18 @@ #' sexit_thresholds(rnorm(1000)) #' \dontrun{ #' if (require("rstanarm")) { -#' model <- stan_glm( +#' model <- suppressWarnings(stan_glm( #' mpg ~ wt + gear, #' data = mtcars, #' chains = 2, #' iter = 200, #' refresh = 0 -#' ) +#' )) #' sexit_thresholds(model) #' -#' model <- stan_glm(vs ~ mpg, data = mtcars, family = "binomial", refresh = 0) +#' model <- suppressWarnings( +#' stan_glm(vs ~ mpg, data = mtcars, family = "binomial", refresh = 0) +#' ) #' sexit_thresholds(model) #' } #' diff --git a/R/si.R b/R/si.R index 6d1e41e22..dd718352e 100644 --- a/R/si.R +++ b/R/si.R @@ -5,9 +5,9 @@ #' updating factor greater or equal than *k*. From the perspective of the Savage-Dickey Bayes factor, testing #' against a point null hypothesis for any value within the support interval will yield a Bayes factor smaller #' than *1/k*. -#' \cr \cr -#' \strong{For more info, in particular on specifying correct priors for factors with more than 2 levels, -#' see [the Bayes factors vignette](https://easystats.github.io/bayestestR/articles/bayes_factors.html).} +#' +#' **For more info, in particular on specifying correct priors for factors with more than 2 levels, +#' see [the Bayes factors vignette](https://easystats.github.io/bayestestR/articles/bayes_factors.html).** #' #' @param BF The amount of support required to be included in the support interval. #' @inheritParams bayesfactor_parameters @@ -20,16 +20,17 @@ #' they should be *not flat*, and it is preferable that they be *informative* - note #' that by default, `brms::brm()` uses flat priors for fixed-effects; see example below). #' -#' \subsection{Choosing a value of `BF`}{ -#' The choice of `BF` (the level of support) depends on what we want our interval to represent: -#' \itemize{ -#' \item A `BF` = 1 contains values whose credibility is not decreased by observing the data. -#' \item A `BF` > 1 contains values who received more impressive support from the data. -#' \item A `BF` < 1 contains values whose credibility has *not* been impressively decreased by observing the data. -#' Testing against values outside this interval will produce a Bayes factor larger than 1/`BF` in support of -#' the alternative. E.g., if an SI (BF = 1/3) excludes 0, the Bayes factor against the point-null will be larger than 3. -#' } -#' } +#' @section Choosing a value of `BF`: +#' The choice of `BF` (the level of support) depends on what we want our interval +#' to represent: +#' +#' - A `BF` = 1 contains values whose credibility is not decreased by observing the data. +#' - A `BF` > 1 contains values who received more impressive support from the data. +#' - A `BF` < 1 contains values whose credibility has *not* been impressively +#' decreased by observing the data. Testing against values outside this interval +#' will produce a Bayes factor larger than 1/`BF` in support of the alternative. +#' E.g., if an SI (BF = 1/3) excludes 0, the Bayes factor against the point-null +#' will be larger than 3. #' #' @inheritSection bayesfactor_parameters Setting the correct `prior` #' @@ -37,7 +38,7 @@ #' #' @return #' A data frame containing the lower and upper bounds of the SI. -#' \cr +#' #' Note that if the level of requested support is higher than observed in the data, the #' interval will be `[NA,NA]`. #' @@ -45,23 +46,23 @@ #' library(bayestestR) #' #' prior <- distribution_normal(1000, mean = 0, sd = 1) -#' posterior <- distribution_normal(1000, mean = .5, sd = .3) +#' posterior <- distribution_normal(1000, mean = 0.5, sd = 0.3) #' -#' si(posterior, prior) +#' si(posterior, prior, verbose = FALSE) #' \dontrun{ #' # rstanarm models #' # --------------- #' library(rstanarm) #' contrasts(sleep$group) <- contr.equalprior_pairs # see vignette #' stan_model <- stan_lmer(extra ~ group + (1 | ID), data = sleep) -#' si(stan_model) -#' si(stan_model, BF = 3) +#' si(stan_model, verbose = FALSE) +#' si(stan_model, BF = 3, verbose = FALSE) #' #' # emmGrid objects #' # --------------- #' library(emmeans) #' group_diff <- pairs(emmeans(stan_model, ~group)) -#' si(group_diff, prior = stan_model) +#' si(group_diff, prior = stan_model, verbose = FALSE) #' #' # brms models #' # ----------- @@ -76,10 +77,11 @@ #' prior = my_custom_priors, #' refresh = 0 #' )) -#' si(brms_model) +#' si(brms_model, verbose = FALSE) #' } #' @references -#' Wagenmakers, E., Gronau, Q. F., Dablander, F., & Etz, A. (2018, November 22). The Support Interval. \doi{10.31234/osf.io/zwnxb} +#' Wagenmakers, E., Gronau, Q. F., Dablander, F., & Etz, A. (2018, November 22). +#' The Support Interval. \doi{10.31234/osf.io/zwnxb} #' #' @export si <- function(posterior, prior = NULL, BF = 1, verbose = TRUE, ...) { diff --git a/R/simulate_priors.R b/R/simulate_priors.R index 1bb5780eb..da04fc8bf 100644 --- a/R/simulate_priors.R +++ b/R/simulate_priors.R @@ -5,14 +5,16 @@ #' @inheritParams effective_sample #' @param n Size of the simulated prior distributions. #' -#' @seealso [unupdate()] for directly sampling from the prior +#' @seealso [`unupdate()`] for directly sampling from the prior #' distribution (useful for complex priors and designs). #' #' @examples #' \dontrun{ #' library(bayestestR) #' if (require("rstanarm")) { -#' model <- stan_glm(mpg ~ wt + am, data = mtcars, chains = 1, refresh = 0) +#' model <- suppressWarnings( +#' stan_glm(mpg ~ wt + am, data = mtcars, chains = 1, refresh = 0) +#' ) #' simulate_prior(model) #' } #' } @@ -66,7 +68,7 @@ simulate_prior.brmsfit <- function(model, effects = effects, component = component, parameters = parameters, - verbose = verbose, + verbose = verbose ) .simulate_prior(priors, n = n, verbose = verbose) diff --git a/R/simulate_simpson.R b/R/simulate_simpson.R index 63861a4a5..fbf62a229 100644 --- a/R/simulate_simpson.R +++ b/R/simulate_simpson.R @@ -4,10 +4,8 @@ #' and statistics, in which a trend appears in several different groups of data #' but disappears or reverses when these groups are combined. #' -#' @param n The number of observations for each group to be generated (minimum -#' 4). -#' @param groups Number of groups (groups can be participants, clusters, -#' anything). +#' @param n The number of observations for each group to be generated (minimum 4). +#' @param groups Number of groups (groups can be participants, clusters, anything). #' @param difference Difference between groups. #' @param group_prefix The prefix of the group name (e.g., "G_1", "G_2", "G_3", ...). #' @inheritParams simulate_correlation diff --git a/R/spi.R b/R/spi.R index 34016e2f2..355a92d46 100644 --- a/R/spi.R +++ b/R/spi.R @@ -35,7 +35,9 @@ #' spi(df, ci = c(0.80, 0.89, 0.95)) #' \dontrun{ #' library(rstanarm) -#' model <- stan_glm(mpg ~ wt + gear, data = mtcars, chains = 2, iter = 200, refresh = 0) +#' model <- suppressWarnings( +#' stan_glm(mpg ~ wt + gear, data = mtcars, chains = 2, iter = 200, refresh = 0) +#' ) #' spi(model) #' } #' diff --git a/R/unupdate.R b/R/unupdate.R index 10ed3df7c..6a30541b8 100644 --- a/R/unupdate.R +++ b/R/unupdate.R @@ -4,7 +4,7 @@ #' the goal of this function is to un-update the posteriors to obtain models #' representing the priors. These models can then be used to examine the prior #' predictive distribution, or to compare priors with posteriors. -#' \cr\cr +#' #' This function in used internally to compute Bayes factors. #' #' @param model A fitted Bayesian model. @@ -117,8 +117,7 @@ unupdate.brmsfit_multiple <- function(model, if (methods::is(model_prior, "try-error")) { if (grepl("proper priors", model_prior, fixed = TRUE)) { insight::format_error( - "Cannot sample from flat priors (such as the default ", - "priors for fixed-effects in a 'brmsfit' model).", + "Cannot sample from flat priors (such as the default priors for fixed-effects in a 'brmsfit' model)." ) } else { insight::format_error(model_prior) diff --git a/R/weighted_posteriors.R b/R/weighted_posteriors.R index b2702cb67..d9f8a9c1b 100644 --- a/R/weighted_posteriors.R +++ b/R/weighted_posteriors.R @@ -22,12 +22,12 @@ #' subtracting for continuous variables, and effects coding via `contr.sum` or #' orthonormal coding via [`contr.equalprior_pairs`] for factors) can reduce this #' issue. In any case you should be mindful of this issue. -#' \cr\cr +#' #' See [bayesfactor_models()] details for more info on passed models. -#' \cr\cr +#' #' Note that for `BayesFactor` models, posterior samples cannot be generated #' from intercept only models. -#' \cr\cr +#' #' This function is similar in function to `brms::posterior_average`. #' #' @note For `BayesFactor < 0.9.12-4.3`, in some instances there might be @@ -36,26 +36,26 @@ #' #' @return A data frame with posterior distributions (weighted across models) . #' -#' @seealso [bayesfactor_inclusion()] for Bayesian model averaging. +#' @seealso [`bayesfactor_inclusion()`] for Bayesian model averaging. #' #' @examples #' \donttest{ -#' if (require("rstanarm") && require("see")) { -#' stan_m0 <- stan_glm(extra ~ 1, +#' if (require("rstanarm") && require("see") && interactive()) { +#' stan_m0 <- suppressWarnings(stan_glm(extra ~ 1, #' data = sleep, #' family = gaussian(), #' refresh = 0, #' diagnostic_file = file.path(tempdir(), "df0.csv") -#' ) +#' )) #' -#' stan_m1 <- stan_glm(extra ~ group, +#' stan_m1 <- suppressWarnings(stan_glm(extra ~ group, #' data = sleep, #' family = gaussian(), #' refresh = 0, #' diagnostic_file = file.path(tempdir(), "df1.csv") -#' ) +#' )) #' -#' res <- weighted_posteriors(stan_m0, stan_m1) +#' res <- weighted_posteriors(stan_m0, stan_m1, verbose = FALSE) #' #' plot(eti(res)) #' } @@ -64,39 +64,42 @@ #' if (require("BayesFactor")) { #' extra_sleep <- ttestBF(formula = extra ~ group, data = sleep) #' -#' wp <- weighted_posteriors(extra_sleep) +#' wp <- weighted_posteriors(extra_sleep, verbose = FALSE) #' -#' describe_posterior(extra_sleep, test = NULL) -#' describe_posterior(wp$delta, test = NULL) # also considers the null +#' describe_posterior(extra_sleep, test = NULL, verbose = FALSE) +#' # also considers the null +#' describe_posterior(wp$delta, test = NULL, verbose = FALSE) #' } #' #' #' ## weighted prediction distributions via data.frames -#' if (require("rstanarm")) { -#' m0 <- stan_glm( +#' if (require("rstanarm") && interactive()) { +#' m0 <- suppressWarnings(stan_glm( #' mpg ~ 1, #' data = mtcars, #' family = gaussian(), #' diagnostic_file = file.path(tempdir(), "df0.csv"), #' refresh = 0 -#' ) +#' )) #' -#' m1 <- stan_glm( +#' m1 <- suppressWarnings(stan_glm( #' mpg ~ carb, #' data = mtcars, #' family = gaussian(), #' diagnostic_file = file.path(tempdir(), "df1.csv"), #' refresh = 0 -#' ) +#' )) #' #' # Predictions: #' pred_m0 <- data.frame(posterior_predict(m0)) #' pred_m1 <- data.frame(posterior_predict(m1)) #' -#' BFmods <- bayesfactor_models(m0, m1) +#' BFmods <- bayesfactor_models(m0, m1, verbose = FALSE) #' -#' wp <- weighted_posteriors(pred_m0, pred_m1, -#' prior_odds = as.numeric(BFmods)[2] +#' wp <- weighted_posteriors( +#' pred_m0, pred_m1, +#' prior_odds = as.numeric(BFmods)[2], +#' verbose = FALSE #' ) #' #' # look at first 5 prediction intervals diff --git a/man/bayesfactor_models.Rd b/man/bayesfactor_models.Rd index 2a7b69060..89e83db95 100644 --- a/man/bayesfactor_models.Rd +++ b/man/bayesfactor_models.Rd @@ -22,7 +22,7 @@ bf_models(..., denominator = 1, verbose = TRUE) \item{...}{Fitted models (see details), all fit on the same data, or a single \code{BFBayesFactor} object (see 'Details'). Ignored in \code{as.matrix()}, \code{update()}. If the following named arguments are present, they are passed -to \link[insight:get_loglikelihood]{insight::get_loglikelihood} (see details): +to \code{\link[insight:get_loglikelihood]{insight::get_loglikelihood()}} (see details): \itemize{ \item \code{estimator} (defaults to \code{"ML"}) \item \code{check_response} (defaults to \code{FALSE}) @@ -53,9 +53,9 @@ This function computes or extracts Bayes factors from fitted models. The \verb{bf_*} function is an alias of the main function. } \details{ -If the passed models are supported by \pkg{insight} the DV of all models will be tested for equality -(else this is assumed to be true), and the models' terms will be extracted (allowing for follow-up -analysis with \code{bayesfactor_inclusion}). +If the passed models are supported by \strong{insight} the DV of all models will +be tested for equality (else this is assumed to be true), and the models' +terms will be extracted (allowing for follow-up analysis with \code{bayesfactor_inclusion}). \itemize{ \item For \code{brmsfit} or \code{stanreg} models, Bayes factors are computed using the \CRANpkg{bridgesampling} package. \itemize{ @@ -74,9 +74,9 @@ are the 4 P's: \strong{P}roper \strong{P}riors and \strong{P}lentiful testing is substantially larger than for estimation (the default of 4000 samples may not be enough in many cases). A conservative rule of thumb is to obtain 10 times more samples than would be required for estimation -(\cite{Gronau, Singmann, & Wagenmakers, 2017}). If less than 40,000 samples +(\emph{Gronau, Singmann, & Wagenmakers, 2017}). If less than 40,000 samples are detected, \code{bayesfactor_models()} gives a warning. -\cr \cr + See also \href{https://easystats.github.io/bayestestR/articles/bayes_factors.html}{the Bayes factors vignette}. } \note{ diff --git a/man/bayesfactor_parameters.Rd b/man/bayesfactor_parameters.Rd index b30815d5f..b5726ecbf 100644 --- a/man/bayesfactor_parameters.Rd +++ b/man/bayesfactor_parameters.Rd @@ -278,9 +278,7 @@ if (require("rstanarm") && require("emmeans") && require("logspline")) { # emmGrid objects # --------------- - group_diff <- suppressWarnings( - pairs(emmeans(stan_model, ~group, data = sleep)) - ) + group_diff <- pairs(emmeans(stan_model, ~group, data = sleep)) bayesfactor_parameters(group_diff, prior = stan_model, verbose = FALSE) # Or diff --git a/man/bayesfactor_restricted.Rd b/man/bayesfactor_restricted.Rd index 3bb3c87d0..1fe715e68 100644 --- a/man/bayesfactor_restricted.Rd +++ b/man/bayesfactor_restricted.Rd @@ -208,9 +208,7 @@ data("disgust") contrasts(disgust$condition) <- contr.equalprior_pairs # see vignette fit_model <- rstanarm::stan_glm(score ~ condition, data = disgust, family = gaussian()) -em_condition <- suppressWarnings( - emmeans::emmeans(fit_model, ~condition, data = disgust) -) +em_condition <- emmeans::emmeans(fit_model, ~condition, data = disgust) hyps <- c("lemon < control & control < sulfur") bayesfactor_restricted(em_condition, prior = fit_model, hypothesis = hyps) diff --git a/man/bci.Rd b/man/bci.Rd index 2bace9b0f..48ac91f10 100644 --- a/man/bci.Rd +++ b/man/bci.Rd @@ -90,7 +90,8 @@ be abbreviated. Only applies to \pkg{brms}-models.} \value{ A data frame with following columns: \itemize{ -\item \code{Parameter} The model parameter(s), if \code{x} is a model-object. If \code{x} is a vector, this column is missing. +\item \code{Parameter} The model parameter(s), if \code{x} is a model-object. If \code{x} is a +vector, this column is missing. \item \code{CI} The probability of the credible interval. \item \code{CI_low}, \code{CI_high} The lower and upper credible interval limits for the parameters. } @@ -106,20 +107,22 @@ from each tail of the distribution and always include the median, the HDI is distributions. While this can be useful to better represent the credibility mass of a distribution, the HDI also has some limitations. See \code{\link[=spi]{spi()}} for details. -\cr \cr + The \href{https://easystats.github.io/bayestestR/articles/credible_interval.html}{\verb{95\%} or \verb{89\%} Credible Intervals (CI)} -are two reasonable ranges to characterize the uncertainty related to the estimation (see \href{https://easystats.github.io/bayestestR/articles/credible_interval.html}{here} for a discussion about the differences between these two values). -\cr +are two reasonable ranges to characterize the uncertainty related to the +estimation (see \href{https://easystats.github.io/bayestestR/articles/credible_interval.html}{here} +for a discussion about the differences between these two values). + The \verb{89\%} intervals (\code{ci = 0.89}) are deemed to be more stable than, for -instance, \verb{95\%} intervals (\cite{Kruschke, 2014}). An effective sample size +instance, \verb{95\%} intervals (\emph{Kruschke, 2014}). An effective sample size of at least 10.000 is recommended if one wants to estimate \verb{95\%} intervals -with high precision (\cite{Kruschke, 2014, p. 183ff}). Unfortunately, the +with high precision (\emph{Kruschke, 2014, p. 183ff}). Unfortunately, the default number of posterior samples for most Bayes packages (e.g., \code{rstanarm} or \code{brms}) is only 4.000 (thus, you might want to increase it when fitting your model). Moreover, 89 indicates the arbitrariness of interval limits - its only remarkable property is being the highest prime number that does not -exceed the already unstable \verb{95\%} threshold (\cite{McElreath, 2015}). -\cr +exceed the already unstable \verb{95\%} threshold (\emph{McElreath, 2015}). + However, \verb{95\%} has some \href{https://easystats.github.io/blog/posts/bayestestr_95/}{advantages too}. For instance, it shares (in the case of a normal posterior distribution) an intuitive relationship with the standard deviation and it conveys a more accurate image @@ -127,17 +130,17 @@ of the (artificial) bounds of the distribution. Also, because it is wider, it makes analyses more conservative (i.e., the probability of covering 0 is larger for the \verb{95\%} CI than for lower ranges such as \verb{89\%}), which is a good thing in the context of the reproducibility crisis. -\cr \cr + A \verb{95\%} equal-tailed interval (ETI) has \verb{2.5\%} of the distribution on either side of its limits. It indicates the 2.5th percentile and the 97.5h percentile. In symmetric distributions, the two methods of computing credible intervals, the ETI and the \link[=hdi]{HDI}, return similar results. -\cr \cr + This is not the case for skewed distributions. Indeed, it is possible that parameter values in the ETI have lower credibility (are less probable) than parameter values outside the ETI. This property seems undesirable as a summary of the credible values in a distribution. -\cr \cr + On the other hand, the ETI range does change when transformations are applied to the distribution (for instance, for a log odds scale to probabilities): the lower and higher bounds of the transformed distribution will correspond diff --git a/man/ci.Rd b/man/ci.Rd index 6934cbff9..a80f688d8 100644 --- a/man/ci.Rd +++ b/man/ci.Rd @@ -60,14 +60,16 @@ ci(x, ...) \method{ci}{MCMCglmm}(x, ci = 0.95, method = "ETI", verbose = TRUE, ...) } \arguments{ -\item{x}{A \code{stanreg} or \code{brmsfit} model, or a vector representing a posterior distribution.} +\item{x}{A \code{stanreg} or \code{brmsfit} model, or a vector representing a posterior +distribution.} \item{...}{Currently not used.} \item{ci}{Value or vector of probability of the CI (between 0 and 1) -to be estimated. Default to \code{.95} (\verb{95\%}).} +to be estimated. Default to \code{0.95} (\verb{95\%}).} -\item{method}{Can be \link[=eti]{'ETI'} (default), \link[=hdi]{'HDI'}, \link[=bci]{'BCI'}, \link[=spi]{'SPI'} or \link[=si]{'SI'}.} +\item{method}{Can be \link[=eti]{"ETI"} (default), \link[=hdi]{"HDI"}, \link[=bci]{"BCI"}, +\link[=spi]{"SPI"} or \link[=si]{"SI"}.} \item{verbose}{Toggle off warnings.} @@ -89,7 +91,8 @@ be abbreviated. Only applies to \pkg{brms}-models.} \value{ A data frame with following columns: \itemize{ -\item \code{Parameter} The model parameter(s), if \code{x} is a model-object. If \code{x} is a vector, this column is missing. +\item \code{Parameter} The model parameter(s), if \code{x} is a model-object. If \code{x} is a +vector, this column is missing. \item \code{CI} The probability of the credible interval. \item \code{CI_low}, \code{CI_high} The lower and upper credible interval limits for the parameters. } @@ -108,9 +111,9 @@ for: \note{ When it comes to interpretation, we recommend thinking of the CI in terms of an "uncertainty" or "compatibility" interval, the latter being defined as -\dQuote{Given any value in the interval and the background assumptions, -the data should not seem very surprising} (\cite{Gelman & Greenland 2019}). -\cr \cr +"Given any value in the interval and the background assumptions, +the data should not seem very surprising" (\emph{Gelman & Greenland 2019}). + There is also a \href{https://easystats.github.io/see/articles/bayestestR.html}{\code{plot()}-method} implemented in the \href{https://easystats.github.io/see/}{\pkg{see}-package}. } \examples{ @@ -137,13 +140,14 @@ ci(bf, method = "ETI") ci(bf, method = "HDI") \dontshow{\}) # examplesIf} \dontshow{if (require("emmeans", quietly = TRUE) && require("rstanarm", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -model <- suppressWarnings(emtrends(model, ~1, "wt", data = mtcars)) +model <- emtrends(model, ~1, "wt", data = mtcars) ci(model, method = "ETI") ci(model, method = "HDI") \dontshow{\}) # examplesIf} } \references{ -Gelman A, Greenland S. Are confidence intervals better termed "uncertainty intervals"? BMJ 2019;l5381. 10.1136/bmj.l5381 +Gelman A, Greenland S. Are confidence intervals better termed "uncertainty +intervals"? BMJ 2019;l5381. 10.1136/bmj.l5381 } \seealso{ Other ci: diff --git a/man/cwi.Rd b/man/cwi.Rd index 253ad9166..6b3db0ae0 100644 --- a/man/cwi.Rd +++ b/man/cwi.Rd @@ -24,7 +24,8 @@ resemble the arguments of the \code{.numeric} or \code{.data.frame}methods.} \value{ A data frame with following columns: \itemize{ -\item \code{Parameter} The model parameter(s), if \code{x} is a model-object. If \code{x} is a vector, this column is missing. +\item \code{Parameter} The model parameter(s), if \code{x} is a model-object. If \code{x} is a +vector, this column is missing. \item \code{CI} The probability of the credible interval. \item \code{CI_low}, \code{CI_high} The lower and upper credible interval limits for the parameters. } diff --git a/man/describe_posterior.Rd b/man/describe_posterior.Rd index 31015e25f..0f1131e34 100644 --- a/man/describe_posterior.Rd +++ b/man/describe_posterior.Rd @@ -84,7 +84,7 @@ to the estimate(s) (\code{SD} and \code{MAD} for \code{mean} and \code{median}, Dispersion is not available for \code{"MAP"} or \code{"mode"} centrality indices.} \item{ci}{Value or vector of probability of the CI (between 0 and 1) -to be estimated. Default to \code{.95} (\verb{95\%}).} +to be estimated. Default to \code{0.95} (\verb{95\%}).} \item{ci_method}{The type of index used for Credible Interval. Can be \code{"ETI"} (default, see \code{\link[=eti]{eti()}}), \code{"HDI"} @@ -187,7 +187,7 @@ if (require("rstanarm") && require("emmeans")) { # emmeans estimates # ----------------------------------------------- - describe_posterior(suppressWarnings(emtrends(model, ~1, "wt"))) + describe_posterior(emtrends(model, ~1, "wt")) } # BayesFactor objects diff --git a/man/equivalence_test.Rd b/man/equivalence_test.Rd index 9a2b2fb00..36aabd050 100644 --- a/man/equivalence_test.Rd +++ b/man/equivalence_test.Rd @@ -38,17 +38,19 @@ equivalence_test(x, ...) ) } \arguments{ -\item{x}{Vector representing a posterior distribution. Can also be a \code{stanreg} or \code{brmsfit} model.} +\item{x}{Vector representing a posterior distribution. Can also be a +\code{stanreg} or \code{brmsfit} model.} \item{...}{Currently not used.} \item{range}{ROPE's lower and higher bounds. Should be \code{"default"} or -depending on the number of outcome variables a vector or a list. In models with one response, -\code{range} should be a vector of length two (e.g., \code{c(-0.1, 0.1)}). In -multivariate models, \code{range} should be a list with a numeric vectors for -each response variable. Vector names should correspond to the name of the response -variables. If \code{"default"} and input is a vector, the range is set to \code{c(-0.1, 0.1)}. If \code{"default"} and input is a Bayesian model, -\code{\link[=rope_range]{rope_range()}} is used.} +depending on the number of outcome variables a vector or a list. In +models with one response, \code{range} should be a vector of length two (e.g., +\code{c(-0.1, 0.1)}). In multivariate models, \code{range} should be a list with a +numeric vectors for each response variable. Vector names should correspond +to the name of the response variables. If \code{"default"} and input is a vector, +the range is set to \code{c(-0.1, 0.1)}. If \code{"default"} and input is a Bayesian +model, \code{\link[=rope_range]{rope_range()}} is used.} \item{ci}{The Credible Interval (CI) probability, corresponding to the proportion of HDI, to use for the percentage in ROPE.} @@ -152,9 +154,7 @@ test <- equivalence_test(model) plot(test) library(emmeans) -equivalence_test( - suppressWarnings(emtrends(model, ~1, "wt", data = mtcars)) -) +equivalence_test(emtrends(model, ~1, "wt", data = mtcars)) library(brms) model <- brms::brm(mpg ~ wt + cyl, data = mtcars) diff --git a/man/estimate_density.Rd b/man/estimate_density.Rd index 001a2e712..c6460eeaf 100644 --- a/man/estimate_density.Rd +++ b/man/estimate_density.Rd @@ -121,9 +121,7 @@ model <- suppressWarnings( head(estimate_density(model)) library(emmeans) -head(estimate_density(suppressWarnings( - emtrends(model, ~1, "wt", data = mtcars) -))) +head(estimate_density(emtrends(model, ~1, "wt", data = mtcars))) # brms models # ----------------------------------------------- diff --git a/man/eti.Rd b/man/eti.Rd index 3161a8a9c..9e7e7ce62 100644 --- a/man/eti.Rd +++ b/man/eti.Rd @@ -62,7 +62,8 @@ for the output.} \value{ A data frame with following columns: \itemize{ -\item \code{Parameter} The model parameter(s), if \code{x} is a model-object. If \code{x} is a vector, this column is missing. +\item \code{Parameter} The model parameter(s), if \code{x} is a model-object. If \code{x} is a +vector, this column is missing. \item \code{CI} The probability of the credible interval. \item \code{CI_low}, \code{CI_high} The lower and upper credible interval limits for the parameters. } @@ -81,20 +82,22 @@ from each tail of the distribution and always include the median, the HDI is distributions. While this can be useful to better represent the credibility mass of a distribution, the HDI also has some limitations. See \code{\link[=spi]{spi()}} for details. -\cr \cr + The \href{https://easystats.github.io/bayestestR/articles/credible_interval.html}{\verb{95\%} or \verb{89\%} Credible Intervals (CI)} -are two reasonable ranges to characterize the uncertainty related to the estimation (see \href{https://easystats.github.io/bayestestR/articles/credible_interval.html}{here} for a discussion about the differences between these two values). -\cr +are two reasonable ranges to characterize the uncertainty related to the +estimation (see \href{https://easystats.github.io/bayestestR/articles/credible_interval.html}{here} +for a discussion about the differences between these two values). + The \verb{89\%} intervals (\code{ci = 0.89}) are deemed to be more stable than, for -instance, \verb{95\%} intervals (\cite{Kruschke, 2014}). An effective sample size +instance, \verb{95\%} intervals (\emph{Kruschke, 2014}). An effective sample size of at least 10.000 is recommended if one wants to estimate \verb{95\%} intervals -with high precision (\cite{Kruschke, 2014, p. 183ff}). Unfortunately, the +with high precision (\emph{Kruschke, 2014, p. 183ff}). Unfortunately, the default number of posterior samples for most Bayes packages (e.g., \code{rstanarm} or \code{brms}) is only 4.000 (thus, you might want to increase it when fitting your model). Moreover, 89 indicates the arbitrariness of interval limits - its only remarkable property is being the highest prime number that does not -exceed the already unstable \verb{95\%} threshold (\cite{McElreath, 2015}). -\cr +exceed the already unstable \verb{95\%} threshold (\emph{McElreath, 2015}). + However, \verb{95\%} has some \href{https://easystats.github.io/blog/posts/bayestestr_95/}{advantages too}. For instance, it shares (in the case of a normal posterior distribution) an intuitive relationship with the standard deviation and it conveys a more accurate image @@ -102,17 +105,17 @@ of the (artificial) bounds of the distribution. Also, because it is wider, it makes analyses more conservative (i.e., the probability of covering 0 is larger for the \verb{95\%} CI than for lower ranges such as \verb{89\%}), which is a good thing in the context of the reproducibility crisis. -\cr \cr + A \verb{95\%} equal-tailed interval (ETI) has \verb{2.5\%} of the distribution on either side of its limits. It indicates the 2.5th percentile and the 97.5h percentile. In symmetric distributions, the two methods of computing credible intervals, the ETI and the \link[=hdi]{HDI}, return similar results. -\cr \cr + This is not the case for skewed distributions. Indeed, it is possible that parameter values in the ETI have lower credibility (are less probable) than parameter values outside the ETI. This property seems undesirable as a summary of the credible values in a distribution. -\cr \cr + On the other hand, the ETI range does change when transformations are applied to the distribution (for instance, for a log odds scale to probabilities): the lower and higher bounds of the transformed distribution will correspond @@ -139,7 +142,7 @@ eti(model) eti(model, ci = c(0.80, 0.89, 0.95)) library(emmeans) -eti(suppressWarnings(emtrends(model, ~1, "wt", data = mtcars))) +eti(emtrends(model, ~1, "wt", data = mtcars)) library(brms) model <- brms::brm(mpg ~ wt + cyl, data = mtcars) diff --git a/man/hdi.Rd b/man/hdi.Rd index adbca70fb..59c3919e9 100644 --- a/man/hdi.Rd +++ b/man/hdi.Rd @@ -65,7 +65,8 @@ for the output.} \value{ A data frame with following columns: \itemize{ -\item \code{Parameter} The model parameter(s), if \code{x} is a model-object. If \code{x} is a vector, this column is missing. +\item \code{Parameter} The model parameter(s), if \code{x} is a model-object. If \code{x} is a +vector, this column is missing. \item \code{CI} The probability of the credible interval. \item \code{CI_low}, \code{CI_high} The lower and upper credible interval limits for the parameters. } @@ -83,20 +84,22 @@ from each tail of the distribution and always include the median, the HDI is distributions. While this can be useful to better represent the credibility mass of a distribution, the HDI also has some limitations. See \code{\link[=spi]{spi()}} for details. -\cr \cr + The \href{https://easystats.github.io/bayestestR/articles/credible_interval.html}{\verb{95\%} or \verb{89\%} Credible Intervals (CI)} -are two reasonable ranges to characterize the uncertainty related to the estimation (see \href{https://easystats.github.io/bayestestR/articles/credible_interval.html}{here} for a discussion about the differences between these two values). -\cr +are two reasonable ranges to characterize the uncertainty related to the +estimation (see \href{https://easystats.github.io/bayestestR/articles/credible_interval.html}{here} +for a discussion about the differences between these two values). + The \verb{89\%} intervals (\code{ci = 0.89}) are deemed to be more stable than, for -instance, \verb{95\%} intervals (\cite{Kruschke, 2014}). An effective sample size +instance, \verb{95\%} intervals (\emph{Kruschke, 2014}). An effective sample size of at least 10.000 is recommended if one wants to estimate \verb{95\%} intervals -with high precision (\cite{Kruschke, 2014, p. 183ff}). Unfortunately, the +with high precision (\emph{Kruschke, 2014, p. 183ff}). Unfortunately, the default number of posterior samples for most Bayes packages (e.g., \code{rstanarm} or \code{brms}) is only 4.000 (thus, you might want to increase it when fitting your model). Moreover, 89 indicates the arbitrariness of interval limits - its only remarkable property is being the highest prime number that does not -exceed the already unstable \verb{95\%} threshold (\cite{McElreath, 2015}). -\cr +exceed the already unstable \verb{95\%} threshold (\emph{McElreath, 2015}). + However, \verb{95\%} has some \href{https://easystats.github.io/blog/posts/bayestestr_95/}{advantages too}. For instance, it shares (in the case of a normal posterior distribution) an intuitive relationship with the standard deviation and it conveys a more accurate image @@ -104,17 +107,17 @@ of the (artificial) bounds of the distribution. Also, because it is wider, it makes analyses more conservative (i.e., the probability of covering 0 is larger for the \verb{95\%} CI than for lower ranges such as \verb{89\%}), which is a good thing in the context of the reproducibility crisis. -\cr \cr + A \verb{95\%} equal-tailed interval (ETI) has \verb{2.5\%} of the distribution on either side of its limits. It indicates the 2.5th percentile and the 97.5h percentile. In symmetric distributions, the two methods of computing credible intervals, the ETI and the \link[=hdi]{HDI}, return similar results. -\cr \cr + This is not the case for skewed distributions. Indeed, it is possible that parameter values in the ETI have lower credibility (are less probable) than parameter values outside the ETI. This property seems undesirable as a summary of the credible values in a distribution. -\cr \cr + On the other hand, the ETI range does change when transformations are applied to the distribution (for instance, for a log odds scale to probabilities): the lower and higher bounds of the transformed distribution will correspond @@ -158,8 +161,10 @@ bayestestR::hdi(bf, ci = c(0.80, 0.90, 0.95)) } \references{ \itemize{ -\item Kruschke, J. (2014). Doing Bayesian data analysis: A tutorial with R, JAGS, and Stan. Academic Press. -\item McElreath, R. (2015). Statistical rethinking: A Bayesian course with examples in R and Stan. Chapman and Hall/CRC. +\item Kruschke, J. (2014). Doing Bayesian data analysis: A tutorial with R, JAGS, +and Stan. Academic Press. +\item McElreath, R. (2015). Statistical rethinking: A Bayesian course with +examples in R and Stan. Chapman and Hall/CRC. } } \seealso{ diff --git a/man/mediation.Rd b/man/mediation.Rd index fd28f5a6b..de4d9643f 100644 --- a/man/mediation.Rd +++ b/man/mediation.Rd @@ -63,9 +63,10 @@ additional predictor), \code{y}, is not transformed. Then we could use \code{"mode"} or \code{"all"}.} \item{ci}{Value or vector of probability of the CI (between 0 and 1) -to be estimated. Default to \code{.95} (\verb{95\%}).} +to be estimated. Default to \code{0.95} (\verb{95\%}).} -\item{method}{Can be \link[=eti]{'ETI'} (default), \link[=hdi]{'HDI'}, \link[=bci]{'BCI'}, \link[=spi]{'SPI'} or \link[=si]{'SI'}.} +\item{method}{Can be \link[=eti]{"ETI"} (default), \link[=hdi]{"HDI"}, \link[=bci]{"BCI"}, +\link[=spi]{"SPI"} or \link[=si]{"SI"}.} } \value{ A data frame with direct, indirect, mediator and @@ -89,18 +90,18 @@ samples from \code{mediator} of the outcome model), \emph{indirect effect} value of sums of posterior samples used for the direct and indirect effect). The \emph{proportion mediated} is the indirect effect divided by the total effect. -\cr \cr + For all values, the \verb{89\%} credible intervals are calculated by default. Use \code{ci} to calculate a different interval. -\cr \cr + The arguments \code{treatment} and \code{mediator} do not necessarily need to be specified. If missing, \code{mediation()} tries to find the treatment and mediator variable automatically. If this does not work, specify these variables. -\cr \cr + The direct effect is also called \emph{average direct effect} (ADE), the indirect effect is also called \emph{average causal mediation effects} -(ACME). See also \cite{Tingley et al. 2014} and \cite{Imai et al. 2010}. +(ACME). See also \emph{Tingley et al. 2014} and \emph{Imai et al. 2010}. } \note{ There is an \code{as.data.frame()} method that returns the posterior @@ -129,7 +130,7 @@ f2 <- bf(depress2 ~ treat + job_seek + econ_hard + sex + age) m2 <- brm(f1 + f2 + set_rescor(FALSE), data = jobs, cores = 4, refresh = 0) # Fit Bayesian mediation model in rstanarm -m3 <- stan_mvmer( +m3 <- suppressWarnings(stan_mvmer( list( job_seek ~ treat + econ_hard + sex + age + (1 | occp), depress2 ~ treat + job_seek + econ_hard + sex + age + (1 | occp) @@ -137,7 +138,7 @@ m3 <- stan_mvmer( data = jobs, cores = 4, refresh = 0 -) +)) summary(m1) mediation(m2, centrality = "mean", ci = 0.95) diff --git a/man/p_map.Rd b/man/p_map.Rd index 1dad381b8..9e9e625fe 100644 --- a/man/p_map.Rd +++ b/man/p_map.Rd @@ -91,7 +91,9 @@ model <- suppressWarnings( p_map(model) library(emmeans) -p_map(emtrends(model, ~1, "wt", data = mtcars)) +p_map(suppressWarnings( + emtrends(model, ~1, "wt", data = mtcars) +)) library(brms) model <- brms::brm(mpg ~ wt + cyl, data = mtcars) diff --git a/man/p_rope.Rd b/man/p_rope.Rd index 169ad4e91..99515923c 100644 --- a/man/p_rope.Rd +++ b/man/p_rope.Rd @@ -31,17 +31,19 @@ p_rope(x, ...) ) } \arguments{ -\item{x}{Vector representing a posterior distribution. Can also be a \code{stanreg} or \code{brmsfit} model.} +\item{x}{Vector representing a posterior distribution. Can also be a +\code{stanreg} or \code{brmsfit} model.} \item{...}{Currently not used.} \item{range}{ROPE's lower and higher bounds. Should be \code{"default"} or -depending on the number of outcome variables a vector or a list. In models with one response, -\code{range} should be a vector of length two (e.g., \code{c(-0.1, 0.1)}). In -multivariate models, \code{range} should be a list with a numeric vectors for -each response variable. Vector names should correspond to the name of the response -variables. If \code{"default"} and input is a vector, the range is set to \code{c(-0.1, 0.1)}. If \code{"default"} and input is a Bayesian model, -\code{\link[=rope_range]{rope_range()}} is used.} +depending on the number of outcome variables a vector or a list. In +models with one response, \code{range} should be a vector of length two (e.g., +\code{c(-0.1, 0.1)}). In multivariate models, \code{range} should be a list with a +numeric vectors for each response variable. Vector names should correspond +to the name of the response variables. If \code{"default"} and input is a vector, +the range is set to \code{c(-0.1, 0.1)}. If \code{"default"} and input is a Bayesian +model, \code{\link[=rope_range]{rope_range()}} is used.} \item{effects}{Should results for fixed effects, random effects or both be returned? Only applies to mixed models. May be abbreviated.} diff --git a/man/p_significance.Rd b/man/p_significance.Rd index ce59b5bcc..2564e744b 100644 --- a/man/p_significance.Rd +++ b/man/p_significance.Rd @@ -33,7 +33,8 @@ p_significance(x, ...) ) } \arguments{ -\item{x}{Vector representing a posterior distribution. Can also be a \code{stanreg} or \code{brmsfit} model.} +\item{x}{Vector representing a posterior distribution. Can also be a +\code{stanreg} or \code{brmsfit} model.} \item{...}{Currently not used.} diff --git a/man/rope.Rd b/man/rope.Rd index 537e00510..3afcfb79a 100644 --- a/man/rope.Rd +++ b/man/rope.Rd @@ -37,17 +37,19 @@ rope(x, ...) ) } \arguments{ -\item{x}{Vector representing a posterior distribution. Can also be a \code{stanreg} or \code{brmsfit} model.} +\item{x}{Vector representing a posterior distribution. Can also be a +\code{stanreg} or \code{brmsfit} model.} \item{...}{Currently not used.} \item{range}{ROPE's lower and higher bounds. Should be \code{"default"} or -depending on the number of outcome variables a vector or a list. In models with one response, -\code{range} should be a vector of length two (e.g., \code{c(-0.1, 0.1)}). In -multivariate models, \code{range} should be a list with a numeric vectors for -each response variable. Vector names should correspond to the name of the response -variables. If \code{"default"} and input is a vector, the range is set to \code{c(-0.1, 0.1)}. If \code{"default"} and input is a Bayesian model, -\code{\link[=rope_range]{rope_range()}} is used.} +depending on the number of outcome variables a vector or a list. In +models with one response, \code{range} should be a vector of length two (e.g., +\code{c(-0.1, 0.1)}). In multivariate models, \code{range} should be a list with a +numeric vectors for each response variable. Vector names should correspond +to the name of the response variables. If \code{"default"} and input is a vector, +the range is set to \code{c(-0.1, 0.1)}. If \code{"default"} and input is a Bayesian +model, \code{\link[=rope_range]{rope_range()}} is used.} \item{ci}{The Credible Interval (CI) probability, corresponding to the proportion of HDI, to use for the percentage in ROPE.} @@ -71,30 +73,36 @@ filtered by default, so only parameters that typically appear in the for the output.} } \description{ -Compute the proportion of the HDI (default to the \verb{89\%} HDI) of a posterior distribution that lies within a region of practical equivalence. +Compute the proportion of the HDI (default to the \verb{89\%} HDI) of a posterior +distribution that lies within a region of practical equivalence. } -\details{ -\subsection{ROPE}{ +\note{ +There is also a \href{https://easystats.github.io/see/articles/bayestestR.html}{\code{plot()}-method} implemented in the \href{https://easystats.github.io/see/}{\pkg{see}-package}. +} +\section{ROPE}{ + Statistically, the probability of a posterior distribution of being different from 0 does not make much sense (the probability of a single value null hypothesis in a continuous distribution is 0). Therefore, the idea underlining ROPE is to let the user define an area around the null value enclosing values that are \emph{equivalent to the null} value for practical -purposes (\cite{Kruschke 2010, 2011, 2014}). -\cr \cr +purposes (\emph{Kruschke 2010, 2011, 2014}). + Kruschke (2018) suggests that such null value could be set, by default, to the -0.1 to 0.1 range of a standardized parameter (negligible effect size according to Cohen, 1988). This could be generalized: For instance, for linear models, the ROPE could be set as \verb{0 +/- .1 * sd(y)}. This ROPE range can be automatically computed for models using the \link{rope_range} function. -\cr \cr + Kruschke (2010, 2011, 2014) suggests using the proportion of the \verb{95\%} (or \verb{89\%}, considered more stable) \link[=hdi]{HDI} that falls within the ROPE as an index for "null-hypothesis" testing (as understood under the Bayesian framework, see \code{\link[=equivalence_test]{equivalence_test()}}). } -\subsection{Sensitivity to parameter's scale}{ + +\section{Sensitivity to parameter's scale}{ + It is important to consider the unit (i.e., the scale) of the predictors when using an index based on the ROPE, as the correct interpretation of the ROPE as representing a region of practical equivalence to zero is dependent @@ -103,7 +111,9 @@ the unit of its parameter. In other words, as the ROPE represents a fixed portion of the response's scale, its proximity with a coefficient depends on the scale of the coefficient itself. } -\subsection{Multicollinearity: Non-independent covariates}{ + +\section{Multicollinearity - Non-independent covariates}{ + When parameters show strong correlations, i.e. when covariates are not independent, the joint parameter distributions may shift towards or away from the ROPE. Collinearity invalidates ROPE and hypothesis @@ -112,63 +122,80 @@ on independence. Most problematic are parameters that only have partial overlap with the ROPE region. In case of collinearity, the (joint) distributions of these parameters may either get an increased or decreased ROPE, which means that inferences based on \code{rope()} are inappropriate -(\cite{Kruschke 2014, 340f}). -\cr \cr +(\emph{Kruschke 2014, 340f}). + \code{rope()} performs a simple check for pairwise correlations between parameters, but as there can be collinearity between more than two variables, a first step to check the assumptions of this hypothesis testing is to look at different pair plots. An even more sophisticated check is the projection -predictive variable selection (\cite{Piironen and Vehtari 2017}). +predictive variable selection (\emph{Piironen and Vehtari 2017}). } -\subsection{Strengths and Limitations}{ -\strong{Strengths:} Provides information related to the practical relevance of the effects. -\cr \cr -\strong{Limitations:} A ROPE range needs to be arbitrarily defined. Sensitive to the scale (the unit) of the predictors. Not sensitive to highly significant effects. -} -} -\note{ -There is also a \href{https://easystats.github.io/see/articles/bayestestR.html}{\code{plot()}-method} implemented in the \href{https://easystats.github.io/see/}{\pkg{see}-package}. + +\section{Strengths and Limitations}{ + +\strong{Strengths:} Provides information related to the practical relevance of +the effects. + +\strong{Limitations:} A ROPE range needs to be arbitrarily defined. Sensitive to +the scale (the unit) of the predictors. Not sensitive to highly significant +effects. } + \examples{ library(bayestestR) rope(x = rnorm(1000, 0, 0.01), range = c(-0.1, 0.1)) rope(x = rnorm(1000, 0, 1), range = c(-0.1, 0.1)) rope(x = rnorm(1000, 1, 0.01), range = c(-0.1, 0.1)) -rope(x = rnorm(1000, 1, 1), ci = c(.90, .95)) +rope(x = rnorm(1000, 1, 1), ci = c(0.90, 0.95)) \dontrun{ library(rstanarm) -model <- stan_glm(mpg ~ wt + gear, data = mtcars, chains = 2, iter = 200, refresh = 0) +model <- suppressWarnings( + stan_glm(mpg ~ wt + gear, data = mtcars, chains = 2, iter = 200, refresh = 0) +) rope(model) -rope(model, ci = c(.90, .95)) +rope(model, ci = c(0.90, 0.95)) library(emmeans) -rope(emtrends(model, ~1, "wt"), ci = c(.90, .95)) +rope(emtrends(model, ~1, "wt"), ci = c(0.90, 0.95)) library(brms) -model <- brms::brm(mpg ~ wt + cyl, data = mtcars) +model <- brm(mpg ~ wt + cyl, data = mtcars) rope(model) -rope(model, ci = c(.90, .95)) +rope(model, ci = c(0.90, 0.95)) library(brms) -model <- brms::brm(brms::mvbind(mpg, disp) ~ wt + cyl, data = mtcars) +model <- brm( + bf(mvbind(mpg, disp) ~ wt + cyl) + set_rescor(rescor = TRUE), + data = mtcars +) rope(model) -rope(model, ci = c(.90, .95)) +rope(model, ci = c(0.90, 0.95)) library(BayesFactor) bf <- ttestBF(x = rnorm(100, 1, 1)) rope(bf) -rope(bf, ci = c(.90, .95)) +rope(bf, ci = c(0.90, 0.95)) } } \references{ \itemize{ \item Cohen, J. (1988). Statistical power analysis for the behavioural sciences. -\item Kruschke, J. K. (2010). What to believe: Bayesian methods for data analysis. Trends in cognitive sciences, 14(7), 293-300. \doi{10.1016/j.tics.2010.05.001}. -\item Kruschke, J. K. (2011). Bayesian assessment of null values via parameter estimation and model comparison. Perspectives on Psychological Science, 6(3), 299-312. \doi{10.1177/1745691611406925}. -\item Kruschke, J. K. (2014). Doing Bayesian data analysis: A tutorial with R, JAGS, and Stan. Academic Press. \doi{10.1177/2515245918771304}. -\item Kruschke, J. K. (2018). Rejecting or accepting parameter values in Bayesian estimation. Advances in Methods and Practices in Psychological Science, 1(2), 270-280. \doi{10.1177/2515245918771304}. -\item Makowski D, Ben-Shachar MS, Chen SHA, Lüdecke D (2019) Indices of Effect Existence and Significance in the Bayesian Framework. Frontiers in Psychology 2019;10:2767. \doi{10.3389/fpsyg.2019.02767} -\item Piironen, J., & Vehtari, A. (2017). Comparison of Bayesian predictive methods for model selection. Statistics and Computing, 27(3), 711–735. \doi{10.1007/s11222-016-9649-y} +\item Kruschke, J. K. (2010). What to believe: Bayesian methods for data analysis. +Trends in cognitive sciences, 14(7), 293-300. \doi{10.1016/j.tics.2010.05.001}. +\item Kruschke, J. K. (2011). Bayesian assessment of null values via parameter +estimation and model comparison. Perspectives on Psychological Science, +6(3), 299-312. \doi{10.1177/1745691611406925}. +\item Kruschke, J. K. (2014). Doing Bayesian data analysis: A tutorial with R, +JAGS, and Stan. Academic Press. \doi{10.1177/2515245918771304}. +\item Kruschke, J. K. (2018). Rejecting or accepting parameter values in Bayesian +estimation. Advances in Methods and Practices in Psychological Science, +1(2), 270-280. \doi{10.1177/2515245918771304}. +\item Makowski D, Ben-Shachar MS, Chen SHA, Lüdecke D (2019) Indices of Effect +Existence and Significance in the Bayesian Framework. Frontiers in +Psychology 2019;10:2767. \doi{10.3389/fpsyg.2019.02767} +\item Piironen, J., & Vehtari, A. (2017). Comparison of Bayesian predictive +methods for model selection. Statistics and Computing, 27(3), 711–735. +\doi{10.1007/s11222-016-9649-y} } } diff --git a/man/rope_range.Rd b/man/rope_range.Rd index b21ae766e..65cbed675 100644 --- a/man/rope_range.Rd +++ b/man/rope_range.Rd @@ -60,16 +60,18 @@ ROPE limits, but it is strongly advised to specify it manually. \examples{ \dontrun{ if (require("rstanarm")) { - model <- stan_glm( + model <- suppressWarnings(stan_glm( mpg ~ wt + gear, data = mtcars, chains = 2, iter = 200, refresh = 0 - ) + )) rope_range(model) - model <- stan_glm(vs ~ mpg, data = mtcars, family = "binomial", refresh = 0) + model <- suppressWarnings( + stan_glm(vs ~ mpg, data = mtcars, family = "binomial", refresh = 0) + ) rope_range(model) } diff --git a/man/sexit.Rd b/man/sexit.Rd index 4fe77723a..8b0f8029a 100644 --- a/man/sexit.Rd +++ b/man/sexit.Rd @@ -122,10 +122,10 @@ s print(s, summary = TRUE) if (require("rstanarm")) { - model <- rstanarm::stan_glm(mpg ~ wt * cyl, + model <- suppressWarnings(rstanarm::stan_glm(mpg ~ wt * cyl, data = mtcars, iter = 400, refresh = 0 - ) + )) s <- sexit(model) s print(s, summary = TRUE) diff --git a/man/sexit_thresholds.Rd b/man/sexit_thresholds.Rd index c4f2cfe17..d80a3d04c 100644 --- a/man/sexit_thresholds.Rd +++ b/man/sexit_thresholds.Rd @@ -7,7 +7,8 @@ sexit_thresholds(x, ...) } \arguments{ -\item{x}{Vector representing a posterior distribution. Can also be a \code{stanreg} or \code{brmsfit} model.} +\item{x}{Vector representing a posterior distribution. Can also be a +\code{stanreg} or \code{brmsfit} model.} \item{...}{Currently not used.} } @@ -22,16 +23,18 @@ information. sexit_thresholds(rnorm(1000)) \dontrun{ if (require("rstanarm")) { - model <- stan_glm( + model <- suppressWarnings(stan_glm( mpg ~ wt + gear, data = mtcars, chains = 2, iter = 200, refresh = 0 - ) + )) sexit_thresholds(model) - model <- stan_glm(vs ~ mpg, data = mtcars, family = "binomial", refresh = 0) + model <- suppressWarnings( + stan_glm(vs ~ mpg, data = mtcars, family = "binomial", refresh = 0) + ) sexit_thresholds(model) } diff --git a/man/si.Rd b/man/si.Rd index b7d42c5eb..5d2e1f495 100644 --- a/man/si.Rd +++ b/man/si.Rd @@ -83,7 +83,7 @@ for the output.} } \value{ A data frame containing the lower and upper bounds of the SI. -\cr + Note that if the level of requested support is higher than observed in the data, the interval will be \verb{[NA,NA]}. } @@ -93,30 +93,34 @@ than average, by some degree \emph{k}; these are values of the parameter that ar updating factor greater or equal than \emph{k}. From the perspective of the Savage-Dickey Bayes factor, testing against a point null hypothesis for any value within the support interval will yield a Bayes factor smaller than \emph{1/k}. -\cr \cr -\strong{For more info, in particular on specifying correct priors for factors with more than 2 levels, -see \href{https://easystats.github.io/bayestestR/articles/bayes_factors.html}{the Bayes factors vignette}.} } \details{ +\strong{For more info, in particular on specifying correct priors for factors with more than 2 levels, +see \href{https://easystats.github.io/bayestestR/articles/bayes_factors.html}{the Bayes factors vignette}.} + This method is used to compute support intervals based on prior and posterior distributions. For the computation of support intervals, the model priors must be proper priors (at the very least they should be \emph{not flat}, and it is preferable that they be \emph{informative} - note that by default, \code{brms::brm()} uses flat priors for fixed-effects; see example below). +} +\note{ +There is also a \href{https://easystats.github.io/see/articles/bayestestR.html}{\code{plot()}-method} implemented in the \href{https://easystats.github.io/see/}{\pkg{see}-package}. +} +\section{Choosing a value of \code{BF}}{ -\subsection{Choosing a value of \code{BF}}{ -The choice of \code{BF} (the level of support) depends on what we want our interval to represent: +The choice of \code{BF} (the level of support) depends on what we want our interval +to represent: \itemize{ \item A \code{BF} = 1 contains values whose credibility is not decreased by observing the data. \item A \code{BF} > 1 contains values who received more impressive support from the data. -\item A \code{BF} < 1 contains values whose credibility has \emph{not} been impressively decreased by observing the data. -Testing against values outside this interval will produce a Bayes factor larger than 1/\code{BF} in support of -the alternative. E.g., if an SI (BF = 1/3) excludes 0, the Bayes factor against the point-null will be larger than 3. +\item A \code{BF} < 1 contains values whose credibility has \emph{not} been impressively +decreased by observing the data. Testing against values outside this interval +will produce a Bayes factor larger than 1/\code{BF} in support of the alternative. +E.g., if an SI (BF = 1/3) excludes 0, the Bayes factor against the point-null +will be larger than 3. } } -} -\note{ -There is also a \href{https://easystats.github.io/see/articles/bayestestR.html}{\code{plot()}-method} implemented in the \href{https://easystats.github.io/see/}{\pkg{see}-package}. -} + \section{Setting the correct \code{prior}}{ For the computation of Bayes factors, the model priors must be proper priors @@ -153,23 +157,23 @@ It is important to provide the correct \code{prior} for meaningful results. library(bayestestR) prior <- distribution_normal(1000, mean = 0, sd = 1) -posterior <- distribution_normal(1000, mean = .5, sd = .3) +posterior <- distribution_normal(1000, mean = 0.5, sd = 0.3) -si(posterior, prior) +si(posterior, prior, verbose = FALSE) \dontrun{ # rstanarm models # --------------- library(rstanarm) contrasts(sleep$group) <- contr.equalprior_pairs # see vignette stan_model <- stan_lmer(extra ~ group + (1 | ID), data = sleep) -si(stan_model) -si(stan_model, BF = 3) +si(stan_model, verbose = FALSE) +si(stan_model, BF = 3, verbose = FALSE) # emmGrid objects # --------------- library(emmeans) group_diff <- pairs(emmeans(stan_model, ~group)) -si(group_diff, prior = stan_model) +si(group_diff, prior = stan_model, verbose = FALSE) # brms models # ----------- @@ -184,12 +188,13 @@ brms_model <- suppressWarnings(brm(extra ~ group + (1 | ID), prior = my_custom_priors, refresh = 0 )) -si(brms_model) +si(brms_model, verbose = FALSE) } \dontshow{\}) # examplesIf} } \references{ -Wagenmakers, E., Gronau, Q. F., Dablander, F., & Etz, A. (2018, November 22). The Support Interval. \doi{10.31234/osf.io/zwnxb} +Wagenmakers, E., Gronau, Q. F., Dablander, F., & Etz, A. (2018, November 22). +The Support Interval. \doi{10.31234/osf.io/zwnxb} } \seealso{ Other ci: diff --git a/man/simulate_prior.Rd b/man/simulate_prior.Rd index ed3c5c522..8d4d0427b 100644 --- a/man/simulate_prior.Rd +++ b/man/simulate_prior.Rd @@ -20,7 +20,9 @@ Transforms priors information to actual distributions. \dontrun{ library(bayestestR) if (require("rstanarm")) { - model <- stan_glm(mpg ~ wt + am, data = mtcars, chains = 1, refresh = 0) + model <- suppressWarnings( + stan_glm(mpg ~ wt + am, data = mtcars, chains = 1, refresh = 0) + ) simulate_prior(model) } } diff --git a/man/simulate_simpson.Rd b/man/simulate_simpson.Rd index 07014c250..5c5590e95 100644 --- a/man/simulate_simpson.Rd +++ b/man/simulate_simpson.Rd @@ -13,14 +13,12 @@ simulate_simpson( ) } \arguments{ -\item{n}{The number of observations for each group to be generated (minimum -4).} +\item{n}{The number of observations for each group to be generated (minimum 4).} \item{r}{A value or vector corresponding to the desired correlation coefficients.} -\item{groups}{Number of groups (groups can be participants, clusters, -anything).} +\item{groups}{Number of groups (groups can be participants, clusters, anything).} \item{difference}{Difference between groups.} diff --git a/man/spi.Rd b/man/spi.Rd index 085be3a30..eb18b761f 100644 --- a/man/spi.Rd +++ b/man/spi.Rd @@ -62,7 +62,8 @@ for the output.} \value{ A data frame with following columns: \itemize{ -\item \code{Parameter} The model parameter(s), if \code{x} is a model-object. If \code{x} is a vector, this column is missing. +\item \code{Parameter} The model parameter(s), if \code{x} is a model-object. If \code{x} is a +vector, this column is missing. \item \code{CI} The probability of the credible interval. \item \code{CI_low}, \code{CI_high} The lower and upper credible interval limits for the parameters. } @@ -98,7 +99,9 @@ spi(df) spi(df, ci = c(0.80, 0.89, 0.95)) \dontrun{ library(rstanarm) -model <- stan_glm(mpg ~ wt + gear, data = mtcars, chains = 2, iter = 200, refresh = 0) +model <- suppressWarnings( + stan_glm(mpg ~ wt + gear, data = mtcars, chains = 2, iter = 200, refresh = 0) +) spi(model) } \dontshow{\}) # examplesIf} diff --git a/man/unupdate.Rd b/man/unupdate.Rd index 2d38361c8..85e6049dd 100644 --- a/man/unupdate.Rd +++ b/man/unupdate.Rd @@ -36,7 +36,8 @@ As posteriors are priors that have been updated after observing some data, the goal of this function is to un-update the posteriors to obtain models representing the priors. These models can then be used to examine the prior predictive distribution, or to compare priors with posteriors. -\cr\cr +} +\details{ This function in used internally to compute Bayes factors. } \keyword{internal} diff --git a/man/weighted_posteriors.Rd b/man/weighted_posteriors.Rd index 2d174ad37..77d154fc9 100644 --- a/man/weighted_posteriors.Rd +++ b/man/weighted_posteriors.Rd @@ -94,12 +94,12 @@ example, the parameter \code{A} plays a different role in the model \code{Y ~ A subtracting for continuous variables, and effects coding via \code{contr.sum} or orthonormal coding via \code{\link{contr.equalprior_pairs}} for factors) can reduce this issue. In any case you should be mindful of this issue. -\cr\cr + See \code{\link[=bayesfactor_models]{bayesfactor_models()}} details for more info on passed models. -\cr\cr + Note that for \code{BayesFactor} models, posterior samples cannot be generated from intercept only models. -\cr\cr + This function is similar in function to \code{brms::posterior_average}. } \note{ @@ -109,22 +109,22 @@ frame. } \examples{ \donttest{ -if (require("rstanarm") && require("see")) { - stan_m0 <- stan_glm(extra ~ 1, +if (require("rstanarm") && require("see") && interactive()) { + stan_m0 <- suppressWarnings(stan_glm(extra ~ 1, data = sleep, family = gaussian(), refresh = 0, diagnostic_file = file.path(tempdir(), "df0.csv") - ) + )) - stan_m1 <- stan_glm(extra ~ group, + stan_m1 <- suppressWarnings(stan_glm(extra ~ group, data = sleep, family = gaussian(), refresh = 0, diagnostic_file = file.path(tempdir(), "df1.csv") - ) + )) - res <- weighted_posteriors(stan_m0, stan_m1) + res <- weighted_posteriors(stan_m0, stan_m1, verbose = FALSE) plot(eti(res)) } @@ -133,39 +133,42 @@ if (require("rstanarm") && require("see")) { if (require("BayesFactor")) { extra_sleep <- ttestBF(formula = extra ~ group, data = sleep) - wp <- weighted_posteriors(extra_sleep) + wp <- weighted_posteriors(extra_sleep, verbose = FALSE) - describe_posterior(extra_sleep, test = NULL) - describe_posterior(wp$delta, test = NULL) # also considers the null + describe_posterior(extra_sleep, test = NULL, verbose = FALSE) + # also considers the null + describe_posterior(wp$delta, test = NULL, verbose = FALSE) } ## weighted prediction distributions via data.frames -if (require("rstanarm")) { - m0 <- stan_glm( +if (require("rstanarm") && interactive()) { + m0 <- suppressWarnings(stan_glm( mpg ~ 1, data = mtcars, family = gaussian(), diagnostic_file = file.path(tempdir(), "df0.csv"), refresh = 0 - ) + )) - m1 <- stan_glm( + m1 <- suppressWarnings(stan_glm( mpg ~ carb, data = mtcars, family = gaussian(), diagnostic_file = file.path(tempdir(), "df1.csv"), refresh = 0 - ) + )) # Predictions: pred_m0 <- data.frame(posterior_predict(m0)) pred_m1 <- data.frame(posterior_predict(m1)) - BFmods <- bayesfactor_models(m0, m1) + BFmods <- bayesfactor_models(m0, m1, verbose = FALSE) - wp <- weighted_posteriors(pred_m0, pred_m1, - prior_odds = as.numeric(BFmods)[2] + wp <- weighted_posteriors( + pred_m0, pred_m1, + prior_odds = as.numeric(BFmods)[2], + verbose = FALSE ) # look at first 5 prediction intervals diff --git a/tests/testthat/test-bayesian_as_frequentist.R b/tests/testthat/test-bayesian_as_frequentist.R new file mode 100644 index 000000000..11876d060 --- /dev/null +++ b/tests/testthat/test-bayesian_as_frequentist.R @@ -0,0 +1,29 @@ +test_that("rstanarm to freq", { + skip_on_cran() + skip_if_offline() + skip_if_not_or_load_if_installed("rstanarm") + skip_if_not_or_load_if_installed("httr") + + set.seed(333) + m <- insight::download_model("stanreg_glm_1") + m1 <- glm(vs ~ wt, data = mtcars, family = "binomial") + m2 <- convert_bayesian_as_frequentist(m) + + expect_equal(coef(m1), coef(m2), tolerance = 1e-3) +}) + + +test_that("rstanarm to freq", { + skip_on_cran() + skip_if_offline() + skip_if_not_or_load_if_installed("rstanarm") + skip_if_not_or_load_if_installed("lme4") + skip_if_not_or_load_if_installed("httr") + + set.seed(333) + m <- insight::download_model("stanreg_lmerMod_1") + m1 <- lme4::lmer(wt ~ cyl + (1 | gear), data = mtcars) + m2 <- convert_bayesian_as_frequentist(m) + + expect_equal(lme4::fixef(m1), lme4::fixef(m2), tolerance = 1e-3) +}) diff --git a/tests/testthat/test-blavaan.R b/tests/testthat/test-blavaan.R index b856cf705..cf30057c1 100644 --- a/tests/testthat/test-blavaan.R +++ b/tests/testthat/test-blavaan.R @@ -43,70 +43,70 @@ test_that("blavaan, all", { x <- point_estimate(bfit, centrality = "all", dispersion = TRUE) expect_true(all(c("Median", "MAD", "Mean", "SD", "MAP", "Component") %in% colnames(x))) - expect_equal(nrow(x), 14) + expect_identical(nrow(x), 14L) x <- eti(bfit) - expect_equal(nrow(x), 14) + expect_identical(nrow(x), 14L) x <- hdi(bfit) - expect_equal(nrow(x), 14) + expect_identical(nrow(x), 14L) x <- p_direction(bfit) - expect_equal(nrow(x), 14) + expect_identical(nrow(x), 14L) - x <- rope(bfit, range = c(-.1, .1)) - expect_equal(nrow(x), 14) + x <- rope(bfit, range = c(-0.1, 0.1)) + expect_identical(nrow(x), 14L) - x <- p_rope(bfit, range = c(-.1, .1)) - expect_equal(nrow(x), 14) + x <- p_rope(bfit, range = c(-0.1, 0.1)) + expect_identical(nrow(x), 14L) x <- p_map(bfit) - expect_equal(nrow(x), 14) + expect_identical(nrow(x), 14L) - x <- p_significance(bfit, threshold = c(-.1, .1)) - expect_equal(nrow(x), 14) + x <- p_significance(bfit, threshold = c(-0.1, 0.1)) + expect_identical(nrow(x), 14L) - x <- equivalence_test(bfit, range = c(-.1, .1)) - expect_equal(nrow(x), 14) + x <- equivalence_test(bfit, range = c(-0.1, 0.1)) + expect_identical(nrow(x), 14L) x <- estimate_density(bfit) - expect_equal(length(unique(x$Parameter)), 14) + expect_length(unique(x$Parameter), 14) ## Bayes factors ---- expect_warning(bayesfactor_models(bfit, bfit2)) x <- suppressWarnings(bayesfactor_models(bfit, bfit2)) - expect_true(x$log_BF[2] < 0) + expect_lt(x$log_BF[2], 0) expect_warning(weighted_posteriors(bfit, bfit2)) x <- suppressWarnings(weighted_posteriors(bfit, bfit2)) - expect_equal(ncol(x), 14) + expect_identical(ncol(x), 14L) # bfit_prior <- unupdate(bfit) # capture.output(x <- expect_warning(bayesfactor_parameters(bfit, prior = bfit_prior))) - # expect_equal(nrow(x), 14) + # expect_identical(nrow(x), 14L) # # x <- expect_warning(si(bfit, prior = bfit_prior)) - # expect_equal(nrow(x), 14) + # expect_identical(nrow(x), 14L) # # ## Prior/posterior checks ---- # suppressWarnings(x <- check_prior(bfit)) # expect_equal(nrow(x), 13) # # x <- check_prior(bfit, simulate_priors = FALSE) - # expect_equal(nrow(x), 14) + # expect_identical(nrow(x), 14L) x <- diagnostic_posterior(bfit) - expect_equal(nrow(x), 14) + expect_identical(nrow(x), 14L) x <- simulate_prior(bfit) - expect_equal(ncol(x), 13) + expect_identical(ncol(x), 13L) # YES this is 13! We have two parameters with the same prior. x <- describe_prior(bfit) - expect_equal(nrow(x), 13) + expect_identical(nrow(x), 13L) # YES this is 13! We have two parameters with the same prior. - # x <- describe_posterior(bfit, test = "all", rope_range = c(-.1, .1)) - # expect_equal(nrow(x), 14) + # x <- describe_posterior(bfit, test = "all", rope_range = c(-0.1, 0.1)) + # expect_identical(nrow(x), 14L) }) diff --git a/tests/testthat/test-ci.R b/tests/testthat/test-ci.R index 9616b3d18..c699cebc1 100644 --- a/tests/testthat/test-ci.R +++ b/tests/testthat/test-ci.R @@ -1,11 +1,19 @@ test_that("ci", { - skip_if_offline() - skip_if_not_or_load_if_installed("rstanarm") - skip_if_not_or_load_if_installed("httr") - skip_if_not_or_load_if_installed("brms") + skip_on_os(c("mac", "linux")) + skip_if_not_or_load_if_installed("quadprog") + set.seed(123) + x <- rnorm(1000, 3, 2) + expect_error(ci(x, method = "FDI"), regex = "`method` should be 'ETI'") + out <- capture.output(print(ci(x, method = "SPI"))) + expect_identical(out, "95% SPI: [-1.16, 6.76]") + out <- capture.output(print(ci(x, method = "BCI"))) + expect_identical(out, "95% ETI: [-0.88, 7.08]") +}) + - expect_equal(ci(distribution_normal(1000), ci = .90)$CI_low[1], -1.6361, tolerance = 0.02) - expect_equal(nrow(ci(distribution_normal(1000), ci = c(.80, .90, .95))), 3, tolerance = 0.01) +test_that("ci", { + expect_equal(ci(distribution_normal(1000), ci = 0.90)$CI_low[1], -1.6361, tolerance = 0.02) + expect_equal(nrow(ci(distribution_normal(1000), ci = c(0.80, 0.90, 0.95))), 3, tolerance = 0.01) expect_equal(ci(distribution_normal(1000), ci = 1)$CI_low[1], -3.29, tolerance = 0.02) # expect_equal(length(capture.output(print(ci(distribution_normal(1000)))))) # expect_equal(length(capture.output(print(ci(distribution_normal(1000), ci = c(.80, .90)))))) @@ -17,12 +25,11 @@ test_that("ci", { x <- data.frame(replicate(4, rnorm(100))) x <- ci(x, ci = c(0.68, 0.89, 0.95)) a <- datawizard::reshape_ci(x) - expect_equal(c(nrow(x), ncol(x)), c(12, 4)) + expect_identical(c(nrow(x), ncol(x)), c(12L, 4L)) expect_true(all(datawizard::reshape_ci(a) == x)) }) - test_that("ci", { skip_if_offline() skip_if_not_or_load_if_installed("rstanarm") diff --git a/tests/testthat/test-overlap.R b/tests/testthat/test-overlap.R index ee03e43bf..f7caae88f 100644 --- a/tests/testthat/test-overlap.R +++ b/tests/testthat/test-overlap.R @@ -4,4 +4,6 @@ test_that("overlap", { y <- distribution_normal(1000, 0, 1) expect_equal(as.numeric(overlap(x, y)), 0.185, tolerance = 0.01) + out <- capture.output(print(overlap(x, y))) + expect_identical(out, c("# Overlap", "", "18.6%")) })