diff --git a/R/compute_variances.R b/R/compute_variances.R index 560e430c6..8dca1ddf2 100644 --- a/R/compute_variances.R +++ b/R/compute_variances.R @@ -198,8 +198,6 @@ - - # store essential information on coefficients, model matrix and so on # as list, since we need these information throughout the functions to # calculate the variance components... @@ -435,9 +433,6 @@ } - - - # helper-function, telling user if family / distribution # is supported or not .badlink <- function(link, family, verbose = TRUE) { @@ -450,9 +445,6 @@ } - - - # glmmTMB returns a list of model information, one for conditional # and one for zero-inflated part, so here we "unlist" it, returning # only the conditional part. @@ -476,8 +468,6 @@ - - # fixed effects variance ------------------------------------------------------ # # This is in line with Nakagawa et al. 2017, Suppl. 2 @@ -492,6 +482,18 @@ +# dispersion-specific variance ---- +# --------------------------------- +.compute_variance_dispersion <- function(x, vals, faminfo, obs.terms) { + if (faminfo$is_linear) { + 0 + } else if (length(obs.terms) == 0) { + 0 + } else { + .compute_variance_random(obs.terms, x = x, vals = vals) + } +} + # variance associated with a random-effects term (Johnson 2014) -------------- @@ -534,13 +536,16 @@ - - # distribution-specific (residual) variance (Nakagawa et al. 2017) ------------ +# (also call obersvation level variance). # # This is in line with Nakagawa et al. 2017, Suppl. 2, and package MuMIm # see https://royalsocietypublishing.org/action/downloadSupplement?doi=10.1098%2Frsif.2017.0213&file=rsif20170213supp2.pdf # +# We need: +# - the overdispersion parameter / sigma +# - the null model for the conditional mean (for the observation level variance) +# # There may be small deviations to Nakagawa et al. for the null-model, which # despite being correctly re-formulated in "null_model()", returns slightly # different values for the log/delta/trigamma approximation. @@ -553,6 +558,7 @@ name, approx_method = "lognormal", verbose = TRUE) { + # get overdispersion parameter / sigma sig <- .safe(get_sigma(x)) if (is.null(sig)) { @@ -599,7 +605,7 @@ # sanity check - clmm-models are "binomial" but have no pmean if (is.null(pmean) && identical(approx_method, "observation_level")) { - approx_method <- "lognormal" + approx_method <- "lognormal" # we don't have lognormal, it's just the default } resid.variance <- switch(faminfo$link_function, @@ -736,27 +742,9 @@ } - - -# dispersion-specific variance ---- -# --------------------------------- -.compute_variance_dispersion <- function(x, vals, faminfo, obs.terms) { - if (faminfo$is_linear) { - 0 - } else if (length(obs.terms) == 0) { - 0 - } else { - .compute_variance_random(obs.terms, x = x, vals = vals) - } -} - - - - - # This is the core-function to calculate the distribution-specific variance -# Nakagawa et al. 2017 propose three different methods, which are now also -# implemented here. +# (also call obersvation level variance). Nakagawa et al. 2017 propose three +# different methods, which are now also implemented here. # # This is in line with Nakagawa et al. 2017, Suppl. 2, and package MuMIm # see https://royalsocietypublishing.org/action/downloadSupplement?doi=10.1098%2Frsif.2017.0213&file=rsif20170213supp2.pdf @@ -782,8 +770,10 @@ # ------------------------------------------------------------------ # approximation of distributional variance, see Nakagawa et al. 2017 - # in general want somethinh lije log(1+var(x)/mu^2) for log-approximation, - # and for the other approximations accordingly + # in general want something like log(1+var(x)/mu^2) for log-approximation, + # and for the other approximations accordingly. Therefore, we need + # "mu" (the conditional mean of the null model - that's why we need + # the null model here) and the overdispersion / sigma parameter # ------------------------------------------------------------------ # check if null-model could be computed @@ -791,8 +781,7 @@ mu <- NA } else { if (inherits(model_null, "cpglmm")) { - # installed? - check_if_installed("cplm") + check_if_installed("cplm") # installed? null_fixef <- unname(cplm::fixef(model_null)) } else { null_fixef <- unname(.collapse_cond(lme4::fixef(model_null))) @@ -823,11 +812,13 @@ beta = , betabinomial = , ordbeta = stats::plogis(mu), + # for count models, Nakagawa et al. suggest this transformation poisson = , quasipoisson = , nbinom = , nbinom1 = , nbinom2 = , + negbinomial = , tweedie = , `negative binomial` = exp(mu + 0.5 * as.vector(revar_null)), link_inverse(x)(mu) ## TODO: check if this is better than "exp(mu)" @@ -838,9 +829,9 @@ cvsquared <- tryCatch( { if (faminfo$link_function == "tweedie") { - vv <- .variance_family_tweedie(x, mu, sig) + dispersion_param <- .variance_family_tweedie(x, mu, sig) } else { - vv <- switch(faminfo$family, + dispersion_param <- switch(faminfo$family, # (zero-inflated) poisson ---- # ---------------------------- @@ -862,6 +853,7 @@ nbinom1 = , nbinom2 = , quasipoisson = , + negbinomial = , `negative binomial` = sig, `zero-inflated negative binomial` = , genpois = .variance_family_nbinom(x, mu, sig, faminfo), @@ -882,7 +874,7 @@ ) } - if (vv < 0 && isTRUE(verbose)) { + if (dispersion_param < 0 && isTRUE(verbose)) { format_warning( "Model's distribution-specific variance is negative. Results are not reliable." ) @@ -893,26 +885,28 @@ # for cpglmm with tweedie link, the model is not of tweedie family, # only the link function is tweedie if (faminfo$link_function == "tweedie") { - vv + dispersion_param } else if (identical(approx_method, "trigamma")) { switch(faminfo$family, nbinom = , nbinom1 = , nbinom2 = , - `negative binomial` = ((1 / mu) + (1 / sig))^-1, + negbinomial = , + `negative binomial` = ((1 / mu) + (1 / dispersion_param))^-1, poisson = , - quasipoisson = mu / vv, - vv / mu^2 + quasipoisson = mu / dispersion_param, + dispersion_param / mu^2 ) } else { switch(faminfo$family, nbinom = , nbinom1 = , nbinom2 = , - `negative binomial` = (1 / mu) + (1 / sig), + negbinomial = , + `negative binomial` = (1 / mu) + (1 / dispersion_param), poisson = , - quasipoisson = vv / mu, - vv / mu^2 + quasipoisson = dispersion_param / mu, + dispersion_param / mu^2 ) } }, diff --git a/R/get_variances.R b/R/get_variances.R index 3696c7dbd..93e6493b2 100644 --- a/R/get_variances.R +++ b/R/get_variances.R @@ -22,10 +22,11 @@ #' @param null_model Optional, a null-model to be used for the calculation of #' random effect variances. If `NULL`, the null-model is computed internally. #' @param approximation Character string, indicating the approximation method -#' for the distribution-specific (residual) variance. Only applies to non-Gaussian -#' models. Can be `"lognormal"` (default), `"delta"` or `"trigamma"`. For binomial -#' models, can also be `"observation_level"`. See _Nakagawa et al. 2017_ for -#' details. +#' for the distribution-specific (observation level, or residual) variance. Only +#' applies to non-Gaussian models. Can be `"lognormal"` (default), `"delta"` or +#' `"trigamma"`. For binomial models, the default is the _theoretical_ distribution +#' specific variance, however, it can also be `"observation_level"`. See +#' _Nakagawa et al. 2017_, in particular supplement 2, for details. #' @param model_component For models that can have a zero-inflation component, #' specify for which component variances should be returned. #' @param ... Currently not used. @@ -34,8 +35,8 @@ #' #' - `var.fixed`, variance attributable to the fixed effects #' - `var.random`, (mean) variance of random effects -#' - `var.residual`, residual variance (sum of dispersion and distribution) -#' - `var.distribution`, distribution-specific variance +#' - `var.residual`, residual variance (sum of dispersion and distribution-specific/observation level variance) +#' - `var.distribution`, distribution-specific (or observation level) variance #' - `var.dispersion`, variance due to additive dispersion #' - `var.intercept`, the random-intercept-variance, or between-subject-variance (\ifelse{html}{\out{τ00}}{\eqn{\tau_{00}}}) #' - `var.slope`, the random-slope-variance (\ifelse{html}{\out{τ11}}{\eqn{\tau_{11}}}) @@ -60,7 +61,7 @@ #' _Johnson 2014_, in particular equation 10. For simple random-intercept models, #' the random effects variance equals the random-intercept variance. #' -#' @section Distribution-specific variance: +#' @section Distribution-specific (observation level) variance: #' The distribution-specific variance, #' \ifelse{html}{\out{σ2d}}{\eqn{\sigma^2_d}}, #' depends on the model family. For Gaussian models, it is @@ -69,9 +70,11 @@ #' \eqn{\pi^2 / 3} for logit-link, `1` for probit-link, and \eqn{\pi^2 / 6} #' for cloglog-links. Models from Gamma-families use \eqn{\mu^2} (as obtained #' from `family$variance()`). For all other models, the distribution-specific -#' variance is based on lognormal approximation, \eqn{log(1 + var(x) / \mu^2)} -#' (see \cite{Nakagawa et al. 2017}). The expected variance of a zero-inflated -#' model is computed according to _Zuur et al. 2012, p277_. +#' variance is by default based on lognormal approximation, +#' \eqn{log(1 + var(x) / \mu^2)} (see \cite{Nakagawa et al. 2017}). Other +#' approximation methods can be specified with the `approximation` argument. +#' The expected variance of a zero-inflated model is computed according to +#' _Zuur et al. 2012, p277_. #' #' @section Variance for the additive overdispersion term: #' The variance for the additive overdispersion term, diff --git a/man/get_variance.Rd b/man/get_variance.Rd index 060964885..afa4c6d65 100644 --- a/man/get_variance.Rd +++ b/man/get_variance.Rd @@ -78,10 +78,11 @@ stricter the test will be. See \code{\link[performance:check_singularity]{perfor random effect variances. If \code{NULL}, the null-model is computed internally.} \item{approximation}{Character string, indicating the approximation method -for the distribution-specific (residual) variance. Only applies to non-Gaussian -models. Can be \code{"lognormal"} (default), \code{"delta"} or \code{"trigamma"}. For binomial -models, can also be \code{"observation_level"}. See \emph{Nakagawa et al. 2017} for -details.} +for the distribution-specific (observation level, or residual) variance. Only +applies to non-Gaussian models. Can be \code{"lognormal"} (default), \code{"delta"} or +\code{"trigamma"}. For binomial models, the default is the \emph{theoretical} distribution +specific variance, however, it can also be \code{"observation_level"}. See +\emph{Nakagawa et al. 2017}, in particular supplement 2, for details.} \item{verbose}{Toggle off warnings.} @@ -93,8 +94,8 @@ A list with following elements: \itemize{ \item \code{var.fixed}, variance attributable to the fixed effects \item \code{var.random}, (mean) variance of random effects -\item \code{var.residual}, residual variance (sum of dispersion and distribution) -\item \code{var.distribution}, distribution-specific variance +\item \code{var.residual}, residual variance (sum of dispersion and distribution-specific/observation level variance) +\item \code{var.distribution}, distribution-specific (or observation level) variance \item \code{var.dispersion}, variance due to additive dispersion \item \code{var.intercept}, the random-intercept-variance, or between-subject-variance (\ifelse{html}{\out{τ00}}{\eqn{\tau_{00}}}) \item \code{var.slope}, the random-slope-variance (\ifelse{html}{\out{τ11}}{\eqn{\tau_{11}}}) @@ -137,7 +138,7 @@ like random slopes or nested random effects. Details can be found in the random effects variance equals the random-intercept variance. } -\section{Distribution-specific variance}{ +\section{Distribution-specific (observation level) variance}{ The distribution-specific variance, \ifelse{html}{\out{σ2d}}{\eqn{\sigma^2_d}}, @@ -147,9 +148,11 @@ For models with binary outcome, it is \eqn{\pi^2 / 3} for logit-link, \code{1} for probit-link, and \eqn{\pi^2 / 6} for cloglog-links. Models from Gamma-families use \eqn{\mu^2} (as obtained from \code{family$variance()}). For all other models, the distribution-specific -variance is based on lognormal approximation, \eqn{log(1 + var(x) / \mu^2)} -(see \cite{Nakagawa et al. 2017}). The expected variance of a zero-inflated -model is computed according to \emph{Zuur et al. 2012, p277}. +variance is by default based on lognormal approximation, +\eqn{log(1 + var(x) / \mu^2)} (see \cite{Nakagawa et al. 2017}). Other +approximation methods can be specified with the \code{approximation} argument. +The expected variance of a zero-inflated model is computed according to +\emph{Zuur et al. 2012, p277}. } \section{Variance for the additive overdispersion term}{