From d8c6e0897965d301f9bd52d75a13321646111ee3 Mon Sep 17 00:00:00 2001
From: Daniel <mail@danielluedecke.de>
Date: Wed, 27 Sep 2023 13:27:56 +0200
Subject: [PATCH] Differences between get_predicted and get_predicted_ci for
 mixed models (#814)

* Differences between get_predicted and get_predicted_ci for mixed models
Fixes #797

* news, version

* docs

* this is intentional, turn styler off

* typos
---
 DESCRIPTION                               |  2 +-
 NEWS.md                                   |  9 ++++++
 R/get_predicted.R                         | 11 ++++---
 R/get_predicted_ci.R                      | 35 ++++++++++++++++++++---
 man/get_predicted.Rd                      |  7 +++--
 man/get_predicted_ci.Rd                   | 31 ++++++++++++++++++--
 man/get_transformation.Rd                 |  8 +++---
 tests/testthat/test-find_transformation.R |  2 ++
 8 files changed, 88 insertions(+), 17 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index dceb5f902..030e28120 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: insight
 Title: Easy Access to Model Information for Various Model Objects
-Version: 0.19.5.4
+Version: 0.19.5.5
 Authors@R: 
     c(person(given = "Daniel",
              family = "Lüdecke",
diff --git a/NEWS.md b/NEWS.md
index c6b7644fb..dd837ea63 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,9 +1,18 @@
 # insight 0.19.6
 
+## General
+
+* Improved documentation for `get_predicted_ci()`.
+
 ## Changes to functions
 
 * `model_info()` now recognized ordered beta families.
 
+## Bug fixes
+
+* `find_transformation()` better detects power-transformation of the response
+  variable.
+
 # insight 0.19.5
 
 ## Bug fixes
diff --git a/R/get_predicted.R b/R/get_predicted.R
index 830cda5dc..ce69797d1 100644
--- a/R/get_predicted.R
+++ b/R/get_predicted.R
@@ -12,11 +12,14 @@
 #' with lots of caveats and complications. Read the 'Details' section for more
 #' information.
 #'
-#' `get_predicted_ci()` returns the confidence (or prediction) interval (CI)
+#' [`get_predicted_ci()`] returns the confidence (or prediction) interval (CI)
 #' associated with predictions made by a model. This function can be called
 #' separately on a vector of predicted values. `get_predicted()` usually
 #' returns confidence intervals (included as attribute, and accessible via the
-#' `as.data.frame()` method) by default.
+#' `as.data.frame()` method) by default. It is preferred to rely on the
+#' `get_predicted()` function for standard errors and confidence intervals -
+#' use `get_predicted_ci()` only if standard errors and confidence intervals
+#' are not available otherwise.
 #'
 #' @param x A statistical model (can also be a data.frame, in which case the
 #'   second argument has to be a model).
@@ -734,7 +737,7 @@ get_predicted.phylolm <- function(x,
 
     # Transform iterations
     if ("iterations" %in% names(attributes(predictions))) {
-      attr(predictions, "iterations") <- as.data.frame(sapply(attributes(predictions)$iterations, link_inv))
+      attr(predictions, "iterations") <- as.data.frame(sapply(attributes(predictions)$iterations, link_inv)) # nolint
     }
 
     # Transform to response "type"
@@ -744,7 +747,7 @@ get_predicted.phylolm <- function(x,
       predictions <- .get_predict_transform_response(predictions, response = response)
       if ("iterations" %in% names(attributes(predictions))) {
         attr(predictions, "iterations") <- as.data.frame(
-          sapply(
+          sapply( # nolint
             attributes(predictions)$iterations,
             .get_predict_transform_response,
             response = response
diff --git a/R/get_predicted_ci.R b/R/get_predicted_ci.R
index c9be7fd3c..427d60b60 100644
--- a/R/get_predicted_ci.R
+++ b/R/get_predicted_ci.R
@@ -2,12 +2,39 @@
 #'
 #' @inheritParams get_predicted
 #' @param predictions A vector of predicted values (as obtained by
-#'   `stats::fitted()`, `stats::predict()` or
-#'   [get_predicted()]).
+#'   `stats::fitted()`, `stats::predict()` or [get_predicted()]).
 #' @param se Numeric vector of standard error of predicted values. If `NULL`,
 #'   standard errors are calculated based on the variance-covariance matrix.
 #' @inheritParams get_predicted
 #'
+#' @details
+#' Typically, `get_predicted()` returns confidence intervals based on the standard
+#' errors as returned by the `predict()`-function, assuming normal distribution
+#' (`+/- 1.96 * SE`) resp. a Student's t-distribution (if degrees of freedom are
+#' available). If `predict()` for a certain class does _not_ return standard
+#' errors (for example, *merMod*-objects), these are calculated manually, based
+#' on following steps: matrix-multiply `X` by the parameter vector `B` to get the
+#' predictions, then extract the variance-covariance matrix `V` of the parameters
+#' and compute `XVX'` to get the variance-covariance matrix of the predictions.
+#' The square-root of the diagonal of this matrix represent the standard errors
+#' of the predictions, which are then multiplied by the critical test-statistic
+#' value (e.g., ~1.96 for normal distribution) for the confidence intervals.
+#'
+#' If `ci_type = "prediction"`, prediction intervals are calculated. These are
+#' wider than confidence intervals, because they also take into account the
+#' uncertainty of the model itself. Before taking the square-root of the
+#' diagonal of the variance-covariance matrix, `get_predicted_ci()` adds the
+#' residual variance to these values. For mixed models, `get_variance_residual()`
+#' is used, while `get_sigma()^2` is used for non-mixed models.
+#'
+#' It is preferred to rely on standard errors returned by `get_predicted()` (i.e.
+#' returned by the `predict()`-function), because these are more accurate than
+#' manually calculated standard errors. Use `get_predicted_ci()` only if standard
+#' errors are not available otherwise. An exception are Bayesian models or
+#' bootstrapped predictions, where `get_predicted_ci()` returns quantiles of the
+#' posterior distribution or bootstrapped samples of the predictions. These are
+#' actually accurate standard errors resp. confidence (or uncertainty) intervals.
+#'
 #' @examplesIf require("boot") && require("datawizard") && require("bayestestR")
 #' # Confidence Intervals for Model Predictions
 #' # ------------------------------------------
@@ -274,7 +301,7 @@ get_predicted_ci.bracl <- get_predicted_ci.mlm
         # for multiple length, SE and predictions may match, could be intended?
         # could there be any cases where we have twice or x times the length of
         # predictions as standard errors?
-        format_warning("Predictions and standard errors are not of the same length. Please check if you need the `data` argument.")
+        format_warning("Predictions and standard errors are not of the same length. Please check if you need the `data` argument.") # nolint
       } else {
         format_error("Predictions and standard errors are not of the same length. Please specify the `data` argument.")
       }
@@ -298,7 +325,7 @@ get_predicted_ci.bracl <- get_predicted_ci.mlm
     format_error("The `data` argument should be a data frame.")
   }
   mm <- get_modelmatrix(x, data = data)
-  out <- sapply(
+  out <- sapply( # nolint
     seq_len(nrow(mm)), function(i) {
       suppressMessages(
         lmerTest::contestMD(x, mm[i, , drop = FALSE], ddf = type)[["DenDF"]]
diff --git a/man/get_predicted.Rd b/man/get_predicted.Rd
index e898accbc..edceadcaf 100644
--- a/man/get_predicted.Rd
+++ b/man/get_predicted.Rd
@@ -214,11 +214,14 @@ important to read the documentation of the arguments. This is because making
 with lots of caveats and complications. Read the 'Details' section for more
 information.
 
-\code{get_predicted_ci()} returns the confidence (or prediction) interval (CI)
+\code{\link[=get_predicted_ci]{get_predicted_ci()}} returns the confidence (or prediction) interval (CI)
 associated with predictions made by a model. This function can be called
 separately on a vector of predicted values. \code{get_predicted()} usually
 returns confidence intervals (included as attribute, and accessible via the
-\code{as.data.frame()} method) by default.
+\code{as.data.frame()} method) by default. It is preferred to rely on the
+\code{get_predicted()} function for standard errors and confidence intervals -
+use \code{get_predicted_ci()} only if standard errors and confidence intervals
+are not available otherwise.
 }
 \details{
 In \code{insight::get_predicted()}, the \code{predict} argument jointly
diff --git a/man/get_predicted_ci.Rd b/man/get_predicted_ci.Rd
index 2edb8540e..e1e6c0df2 100644
--- a/man/get_predicted_ci.Rd
+++ b/man/get_predicted_ci.Rd
@@ -29,8 +29,7 @@ second argument has to be a model).}
 \item{...}{Other argument to be passed, for instance to \code{get_predicted_ci()}.}
 
 \item{predictions}{A vector of predicted values (as obtained by
-\code{stats::fitted()}, \code{stats::predict()} or
-\code{\link[=get_predicted]{get_predicted()}}).}
+\code{stats::fitted()}, \code{stats::predict()} or \code{\link[=get_predicted]{get_predicted()}}).}
 
 \item{data}{An optional data frame in which to look for variables with which
 to predict. If omitted, the data used to fit the model is used. Visualization
@@ -106,6 +105,34 @@ equals the default from the {sandwich} package; for type \code{"CR"} (or
 \description{
 Confidence intervals around predicted values
 }
+\details{
+Typically, \code{get_predicted()} returns confidence intervals based on the standard
+errors as returned by the \code{predict()}-function, assuming normal distribution
+(\verb{+/- 1.96 * SE}) resp. a Student's t-distribution (if degrees of freedom are
+available). If \code{predict()} for a certain class does \emph{not} return standard
+errors (for example, \emph{merMod}-objects), these are calculated manually, based
+on following steps: matrix-multiply \code{X} by the parameter vector \code{B} to get the
+predictions, then extract the variance-covariance matrix \code{V} of the parameters
+and compute \verb{XVX'} to get the variance-covariance matrix of the predictions.
+The square-root of the diagonal of this matrix represent the standard errors
+of the predictions, which are then multiplied by the critical test-statistic
+value (e.g., ~1.96 for normal distribution) for the confidence intervals.
+
+If \code{ci_type = "prediction"}, prediction intervals are calculated. These are
+wider than confidence intervals, because they also take into account the
+uncertainty of the model itself. Before taking the square-root of the
+diagonal of the variance-covariance matrix, \code{get_predicted_ci()} adds the
+residual variance to these values. For mixed models, \code{get_variance_residual()}
+is used, while \code{get_sigma()^2} is used for non-mixed models.
+
+It is preferred to rely on standard errors returned by \code{get_predicted()} (i.e.
+returned by the \code{predict()}-function), because these are more accurate than
+manually calculated standard errors. Use \code{get_predicted_ci()} only if standard
+errors are not available otherwise. An exception are Bayesian models or
+bootstrapped predictions, where \code{get_predicted_ci()} returns quantiles of the
+posterior distribution or bootstrapped samples of the predictions. These are
+actually accurate standard errors resp. confidence (or uncertainty) intervals.
+}
 \examples{
 \dontshow{if (require("boot") && require("datawizard") && require("bayestestR")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 # Confidence Intervals for Model Predictions
diff --git a/man/get_transformation.Rd b/man/get_transformation.Rd
index 6823e24d4..79aa68a1a 100644
--- a/man/get_transformation.Rd
+++ b/man/get_transformation.Rd
@@ -33,10 +33,10 @@ model <- lm(log(Sepal.Length) ~ Species, data = iris)
 get_transformation(model)
 
 # log-function
-get_transformation(model)$transformation(.3)
-log(.3)
+get_transformation(model)$transformation(0.3)
+log(0.3)
 
 # inverse function is exp()
-get_transformation(model)$inverse(.3)
-exp(.3)
+get_transformation(model)$inverse(0.3)
+exp(0.3)
 }
diff --git a/tests/testthat/test-find_transformation.R b/tests/testthat/test-find_transformation.R
index b5530789f..7f779de22 100644
--- a/tests/testthat/test-find_transformation.R
+++ b/tests/testthat/test-find_transformation.R
@@ -49,6 +49,7 @@ test_that("find_transformation - strange bayestestR example", {
 })
 
 test_that("find_transformation - detect powers", {
+  # styler: off
   data(iris)
   m1 <- lm(Sepal.Length^(1 / 2) ~ Species, data = iris)
   m2 <- lm(Sepal.Length^2 ~ Species, data = iris)
@@ -81,4 +82,5 @@ test_that("find_transformation - detect powers", {
   expect_identical(insight::find_transformation(m4), "power")
   expect_identical(insight::find_transformation(m5), "power")
   expect_identical(insight::find_transformation(m6), "power")
+  # styler: on
 })