From d8c6e0897965d301f9bd52d75a13321646111ee3 Mon Sep 17 00:00:00 2001 From: Daniel Date: Wed, 27 Sep 2023 13:27:56 +0200 Subject: [PATCH] Differences between get_predicted and get_predicted_ci for mixed models (#814) * Differences between get_predicted and get_predicted_ci for mixed models Fixes #797 * news, version * docs * this is intentional, turn styler off * typos --- DESCRIPTION | 2 +- NEWS.md | 9 ++++++ R/get_predicted.R | 11 ++++--- R/get_predicted_ci.R | 35 ++++++++++++++++++++--- man/get_predicted.Rd | 7 +++-- man/get_predicted_ci.Rd | 31 ++++++++++++++++++-- man/get_transformation.Rd | 8 +++--- tests/testthat/test-find_transformation.R | 2 ++ 8 files changed, 88 insertions(+), 17 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index dceb5f902..030e28120 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: insight Title: Easy Access to Model Information for Various Model Objects -Version: 0.19.5.4 +Version: 0.19.5.5 Authors@R: c(person(given = "Daniel", family = "Lüdecke", diff --git a/NEWS.md b/NEWS.md index c6b7644fb..dd837ea63 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,9 +1,18 @@ # insight 0.19.6 +## General + +* Improved documentation for `get_predicted_ci()`. + ## Changes to functions * `model_info()` now recognized ordered beta families. +## Bug fixes + +* `find_transformation()` better detects power-transformation of the response + variable. + # insight 0.19.5 ## Bug fixes diff --git a/R/get_predicted.R b/R/get_predicted.R index 830cda5dc..ce69797d1 100644 --- a/R/get_predicted.R +++ b/R/get_predicted.R @@ -12,11 +12,14 @@ #' with lots of caveats and complications. Read the 'Details' section for more #' information. #' -#' `get_predicted_ci()` returns the confidence (or prediction) interval (CI) +#' [`get_predicted_ci()`] returns the confidence (or prediction) interval (CI) #' associated with predictions made by a model. This function can be called #' separately on a vector of predicted values. `get_predicted()` usually #' returns confidence intervals (included as attribute, and accessible via the -#' `as.data.frame()` method) by default. +#' `as.data.frame()` method) by default. It is preferred to rely on the +#' `get_predicted()` function for standard errors and confidence intervals - +#' use `get_predicted_ci()` only if standard errors and confidence intervals +#' are not available otherwise. #' #' @param x A statistical model (can also be a data.frame, in which case the #' second argument has to be a model). @@ -734,7 +737,7 @@ get_predicted.phylolm <- function(x, # Transform iterations if ("iterations" %in% names(attributes(predictions))) { - attr(predictions, "iterations") <- as.data.frame(sapply(attributes(predictions)$iterations, link_inv)) + attr(predictions, "iterations") <- as.data.frame(sapply(attributes(predictions)$iterations, link_inv)) # nolint } # Transform to response "type" @@ -744,7 +747,7 @@ get_predicted.phylolm <- function(x, predictions <- .get_predict_transform_response(predictions, response = response) if ("iterations" %in% names(attributes(predictions))) { attr(predictions, "iterations") <- as.data.frame( - sapply( + sapply( # nolint attributes(predictions)$iterations, .get_predict_transform_response, response = response diff --git a/R/get_predicted_ci.R b/R/get_predicted_ci.R index c9be7fd3c..427d60b60 100644 --- a/R/get_predicted_ci.R +++ b/R/get_predicted_ci.R @@ -2,12 +2,39 @@ #' #' @inheritParams get_predicted #' @param predictions A vector of predicted values (as obtained by -#' `stats::fitted()`, `stats::predict()` or -#' [get_predicted()]). +#' `stats::fitted()`, `stats::predict()` or [get_predicted()]). #' @param se Numeric vector of standard error of predicted values. If `NULL`, #' standard errors are calculated based on the variance-covariance matrix. #' @inheritParams get_predicted #' +#' @details +#' Typically, `get_predicted()` returns confidence intervals based on the standard +#' errors as returned by the `predict()`-function, assuming normal distribution +#' (`+/- 1.96 * SE`) resp. a Student's t-distribution (if degrees of freedom are +#' available). If `predict()` for a certain class does _not_ return standard +#' errors (for example, *merMod*-objects), these are calculated manually, based +#' on following steps: matrix-multiply `X` by the parameter vector `B` to get the +#' predictions, then extract the variance-covariance matrix `V` of the parameters +#' and compute `XVX'` to get the variance-covariance matrix of the predictions. +#' The square-root of the diagonal of this matrix represent the standard errors +#' of the predictions, which are then multiplied by the critical test-statistic +#' value (e.g., ~1.96 for normal distribution) for the confidence intervals. +#' +#' If `ci_type = "prediction"`, prediction intervals are calculated. These are +#' wider than confidence intervals, because they also take into account the +#' uncertainty of the model itself. Before taking the square-root of the +#' diagonal of the variance-covariance matrix, `get_predicted_ci()` adds the +#' residual variance to these values. For mixed models, `get_variance_residual()` +#' is used, while `get_sigma()^2` is used for non-mixed models. +#' +#' It is preferred to rely on standard errors returned by `get_predicted()` (i.e. +#' returned by the `predict()`-function), because these are more accurate than +#' manually calculated standard errors. Use `get_predicted_ci()` only if standard +#' errors are not available otherwise. An exception are Bayesian models or +#' bootstrapped predictions, where `get_predicted_ci()` returns quantiles of the +#' posterior distribution or bootstrapped samples of the predictions. These are +#' actually accurate standard errors resp. confidence (or uncertainty) intervals. +#' #' @examplesIf require("boot") && require("datawizard") && require("bayestestR") #' # Confidence Intervals for Model Predictions #' # ------------------------------------------ @@ -274,7 +301,7 @@ get_predicted_ci.bracl <- get_predicted_ci.mlm # for multiple length, SE and predictions may match, could be intended? # could there be any cases where we have twice or x times the length of # predictions as standard errors? - format_warning("Predictions and standard errors are not of the same length. Please check if you need the `data` argument.") + format_warning("Predictions and standard errors are not of the same length. Please check if you need the `data` argument.") # nolint } else { format_error("Predictions and standard errors are not of the same length. Please specify the `data` argument.") } @@ -298,7 +325,7 @@ get_predicted_ci.bracl <- get_predicted_ci.mlm format_error("The `data` argument should be a data frame.") } mm <- get_modelmatrix(x, data = data) - out <- sapply( + out <- sapply( # nolint seq_len(nrow(mm)), function(i) { suppressMessages( lmerTest::contestMD(x, mm[i, , drop = FALSE], ddf = type)[["DenDF"]] diff --git a/man/get_predicted.Rd b/man/get_predicted.Rd index e898accbc..edceadcaf 100644 --- a/man/get_predicted.Rd +++ b/man/get_predicted.Rd @@ -214,11 +214,14 @@ important to read the documentation of the arguments. This is because making with lots of caveats and complications. Read the 'Details' section for more information. -\code{get_predicted_ci()} returns the confidence (or prediction) interval (CI) +\code{\link[=get_predicted_ci]{get_predicted_ci()}} returns the confidence (or prediction) interval (CI) associated with predictions made by a model. This function can be called separately on a vector of predicted values. \code{get_predicted()} usually returns confidence intervals (included as attribute, and accessible via the -\code{as.data.frame()} method) by default. +\code{as.data.frame()} method) by default. It is preferred to rely on the +\code{get_predicted()} function for standard errors and confidence intervals - +use \code{get_predicted_ci()} only if standard errors and confidence intervals +are not available otherwise. } \details{ In \code{insight::get_predicted()}, the \code{predict} argument jointly diff --git a/man/get_predicted_ci.Rd b/man/get_predicted_ci.Rd index 2edb8540e..e1e6c0df2 100644 --- a/man/get_predicted_ci.Rd +++ b/man/get_predicted_ci.Rd @@ -29,8 +29,7 @@ second argument has to be a model).} \item{...}{Other argument to be passed, for instance to \code{get_predicted_ci()}.} \item{predictions}{A vector of predicted values (as obtained by -\code{stats::fitted()}, \code{stats::predict()} or -\code{\link[=get_predicted]{get_predicted()}}).} +\code{stats::fitted()}, \code{stats::predict()} or \code{\link[=get_predicted]{get_predicted()}}).} \item{data}{An optional data frame in which to look for variables with which to predict. If omitted, the data used to fit the model is used. Visualization @@ -106,6 +105,34 @@ equals the default from the {sandwich} package; for type \code{"CR"} (or \description{ Confidence intervals around predicted values } +\details{ +Typically, \code{get_predicted()} returns confidence intervals based on the standard +errors as returned by the \code{predict()}-function, assuming normal distribution +(\verb{+/- 1.96 * SE}) resp. a Student's t-distribution (if degrees of freedom are +available). If \code{predict()} for a certain class does \emph{not} return standard +errors (for example, \emph{merMod}-objects), these are calculated manually, based +on following steps: matrix-multiply \code{X} by the parameter vector \code{B} to get the +predictions, then extract the variance-covariance matrix \code{V} of the parameters +and compute \verb{XVX'} to get the variance-covariance matrix of the predictions. +The square-root of the diagonal of this matrix represent the standard errors +of the predictions, which are then multiplied by the critical test-statistic +value (e.g., ~1.96 for normal distribution) for the confidence intervals. + +If \code{ci_type = "prediction"}, prediction intervals are calculated. These are +wider than confidence intervals, because they also take into account the +uncertainty of the model itself. Before taking the square-root of the +diagonal of the variance-covariance matrix, \code{get_predicted_ci()} adds the +residual variance to these values. For mixed models, \code{get_variance_residual()} +is used, while \code{get_sigma()^2} is used for non-mixed models. + +It is preferred to rely on standard errors returned by \code{get_predicted()} (i.e. +returned by the \code{predict()}-function), because these are more accurate than +manually calculated standard errors. Use \code{get_predicted_ci()} only if standard +errors are not available otherwise. An exception are Bayesian models or +bootstrapped predictions, where \code{get_predicted_ci()} returns quantiles of the +posterior distribution or bootstrapped samples of the predictions. These are +actually accurate standard errors resp. confidence (or uncertainty) intervals. +} \examples{ \dontshow{if (require("boot") && require("datawizard") && require("bayestestR")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} # Confidence Intervals for Model Predictions diff --git a/man/get_transformation.Rd b/man/get_transformation.Rd index 6823e24d4..79aa68a1a 100644 --- a/man/get_transformation.Rd +++ b/man/get_transformation.Rd @@ -33,10 +33,10 @@ model <- lm(log(Sepal.Length) ~ Species, data = iris) get_transformation(model) # log-function -get_transformation(model)$transformation(.3) -log(.3) +get_transformation(model)$transformation(0.3) +log(0.3) # inverse function is exp() -get_transformation(model)$inverse(.3) -exp(.3) +get_transformation(model)$inverse(0.3) +exp(0.3) } diff --git a/tests/testthat/test-find_transformation.R b/tests/testthat/test-find_transformation.R index b5530789f..7f779de22 100644 --- a/tests/testthat/test-find_transformation.R +++ b/tests/testthat/test-find_transformation.R @@ -49,6 +49,7 @@ test_that("find_transformation - strange bayestestR example", { }) test_that("find_transformation - detect powers", { + # styler: off data(iris) m1 <- lm(Sepal.Length^(1 / 2) ~ Species, data = iris) m2 <- lm(Sepal.Length^2 ~ Species, data = iris) @@ -81,4 +82,5 @@ test_that("find_transformation - detect powers", { expect_identical(insight::find_transformation(m4), "power") expect_identical(insight::find_transformation(m5), "power") expect_identical(insight::find_transformation(m6), "power") + # styler: on })