From ba4e61a05e569853acf081df19dbefea537293ea Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 17 Jun 2024 10:51:32 +0200 Subject: [PATCH] [Proposal] Adding support for pooled (multiple imputed) glmtoolbox::glmgee Fixes #894 --- DESCRIPTION | 3 +- NAMESPACE | 9 +++ NEWS.md | 4 ++ R/find_formula.R | 3 + R/find_parameters_other.R | 28 ++++++++ R/find_statistic.R | 2 +- R/get_data.R | 3 + R/get_df.R | 2 +- R/get_parameters_gam.R | 18 +++--- R/get_parameters_others.R | 49 +++++++++++--- R/get_statistic.R | 20 ++++++ R/helper_functions.R | 3 +- R/is_model.R | 4 +- R/link_function.R | 3 + R/link_inverse.R | 3 + R/model_info.R | 13 ++++ R/n_obs.R | 6 ++ R/utils_model_info.R | 2 +- man/find_parameters.averaging.Rd | 8 +++ man/get_parameters.betareg.Rd | 3 + tests/testthat/test-glmgee.R | 107 +++++++++++++++++++++++++++++++ 21 files changed, 267 insertions(+), 26 deletions(-) create mode 100644 tests/testthat/test-glmgee.R diff --git a/DESCRIPTION b/DESCRIPTION index 8d607b1be..6b8f80dce 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: insight Title: Easy Access to Model Information for Various Model Objects -Version: 0.20.1.4 +Version: 0.20.1.5 Authors@R: c(person(given = "Daniel", family = "Lüdecke", @@ -126,6 +126,7 @@ Suggests: ggeffects, GLMMadaptive, glmmTMB, + glmtoolbox, gmnl, grDevices, gt, diff --git a/NAMESPACE b/NAMESPACE index 1be3e3f68..b674ea108 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -121,6 +121,7 @@ S3method(find_formula,gamm) S3method(find_formula,gee) S3method(find_formula,glht) S3method(find_formula,glimML) +S3method(find_formula,glmgee) S3method(find_formula,glmm) S3method(find_formula,glmmPQL) S3method(find_formula,glmmTMB) @@ -233,6 +234,7 @@ S3method(find_parameters,gamm) S3method(find_parameters,gbm) S3method(find_parameters,glht) S3method(find_parameters,glimML) +S3method(find_parameters,glmgee) S3method(find_parameters,glmm) S3method(find_parameters,glmmTMB) S3method(find_parameters,glmmadmb) @@ -406,6 +408,7 @@ S3method(get_data,gee) S3method(get_data,geeglm) S3method(get_data,glht) S3method(get_data,glimML) +S3method(get_data,glmgee) S3method(get_data,glmm) S3method(get_data,glmmTMB) S3method(get_data,glmmadmb) @@ -621,6 +624,7 @@ S3method(get_parameters,gbm) S3method(get_parameters,ggcomparisons) S3method(get_parameters,glht) S3method(get_parameters,glimML) +S3method(get_parameters,glmgee) S3method(get_parameters,glmm) S3method(get_parameters,glmmTMB) S3method(get_parameters,glmmadmb) @@ -830,6 +834,7 @@ S3method(get_statistic,geeglm) S3method(get_statistic,ggcomparisons) S3method(get_statistic,glht) S3method(get_statistic,glimML) +S3method(get_statistic,glmgee) S3method(get_statistic,glmm) S3method(get_statistic,glmmTMB) S3method(get_statistic,glmmadmb) @@ -1076,6 +1081,7 @@ S3method(link_function,gamm) S3method(link_function,gbm) S3method(link_function,glimML) S3method(link_function,glm) +S3method(link_function,glmgee) S3method(link_function,glmm) S3method(link_function,glmmadmb) S3method(link_function,glmx) @@ -1198,6 +1204,7 @@ S3method(link_inverse,gamm) S3method(link_inverse,gbm) S3method(link_inverse,glimML) S3method(link_inverse,glm) +S3method(link_inverse,glmgee) S3method(link_inverse,glmm) S3method(link_inverse,glmmPQL) S3method(link_inverse,glmmTMB) @@ -1335,6 +1342,7 @@ S3method(model_info,gbm) S3method(model_info,ggcomparisons) S3method(model_info,glht) S3method(model_info,glimML) +S3method(model_info,glmgee) S3method(model_info,glmm) S3method(model_info,glmmPQL) S3method(model_info,glmmTMB) @@ -1475,6 +1483,7 @@ S3method(n_obs,gbm) S3method(n_obs,glimML) S3method(n_obs,glm) S3method(n_obs,glmRob) +S3method(n_obs,glmgee) S3method(n_obs,gmnl) S3method(n_obs,hurdle) S3method(n_obs,ivFixed) diff --git a/NEWS.md b/NEWS.md index ea42019be..afdee4658 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,9 @@ # insight 0.20.2 +## New supported models + +* Support for models of class `glmgee` (package *glmtoolbox*). + ## General * Massive overhaul of `get_variance()`. The function should be now more diff --git a/R/find_formula.R b/R/find_formula.R index c7170a0c6..58347ba02 100644 --- a/R/find_formula.R +++ b/R/find_formula.R @@ -574,6 +574,9 @@ find_formula.gee <- function(x, verbose = TRUE, ...) { .find_formula_return(f, verbose = verbose) } +#' @export +find_formula.glmgee <- find_formula.gee + #' @export find_formula.MANOVA <- function(x, verbose = TRUE, ...) { diff --git a/R/find_parameters_other.R b/R/find_parameters_other.R index ed2c13954..91f25c48c 100644 --- a/R/find_parameters_other.R +++ b/R/find_parameters_other.R @@ -36,6 +36,34 @@ find_parameters.averaging <- function(x, } +#' @rdname find_parameters.averaging +#' @export +find_parameters.glmgee <- function(x, + component = c("all", "conditional", "dispersion"), + flatten = FALSE, + ...) { + component <- match.arg(component) + + junk <- utils::capture.output({ + cs <- suppressWarnings(stats::coef(summary(x, corr = FALSE))) + }) + params <- compact_character(rownames(cs)) + + out <- list( + conditional = text_remove_backticks(params[params != "Dispersion"]), + dispersion = text_remove_backticks(params[params == "Dispersion"]) + ) + + .filter_parameters( + out, + effects = "all", + component = component, + flatten = flatten, + recursive = FALSE + ) +} + + #' @rdname find_parameters.averaging #' @export find_parameters.betareg <- function(x, diff --git a/R/find_statistic.R b/R/find_statistic.R index 62c03534a..a09bcfbf0 100644 --- a/R/find_statistic.R +++ b/R/find_statistic.R @@ -121,7 +121,7 @@ find_statistic <- function(x, ...) { "ergm", "feglm", "flexsurvreg", "gee", "ggcomparisons", "glimML", "glmm", "glmmadmb", "glmmFit", "glmmLasso", - "glmmTMB", "glmx", "gmnl", + "glmmTMB", "glmx", "gmnl", "glmgee", "hurdle", "lavaan", "loggammacenslmrob", "logitmfx", "logitor", "logitr", "LORgee", "lrm", "margins", "marginaleffects", "marginaleffects.summary", "metaplus", "mixor", diff --git a/R/get_data.R b/R/get_data.R index 06b266ae7..2865b01a7 100644 --- a/R/get_data.R +++ b/R/get_data.R @@ -425,6 +425,9 @@ get_data.geeglm <- function(x, .prepare_get_data(x, mf, effects = effects, verbose = verbose) } +#' @export +get_data.glmgee <- get_data.geeglm + #' @export get_data.gee <- function(x, diff --git a/R/get_df.R b/R/get_df.R index ef252f0b2..e41402a46 100644 --- a/R/get_df.R +++ b/R/get_df.R @@ -120,7 +120,7 @@ get_df.default <- function(x, type = "residual", verbose = TRUE, ...) { } - if (type == "normal") { + if (type == "normal") { # nolint # Wald normal approximation - always Inf ----- return(Inf) } else if (type == "residual") { diff --git a/R/get_parameters_gam.R b/R/get_parameters_gam.R index 3319c4123..f700d11b0 100644 --- a/R/get_parameters_gam.R +++ b/R/get_parameters_gam.R @@ -170,7 +170,9 @@ get_parameters.SemiParBIV <- function(x, ...) { .return_smooth_parms <- function(conditional, smooth_terms, component) { - if (!is_empty_object(conditional)) { + if (is_empty_object(conditional)) { + cond <- NULL + } else { cond <- data.frame( Parameter = names(conditional), Estimate = conditional, @@ -178,27 +180,25 @@ get_parameters.SemiParBIV <- function(x, ...) { stringsAsFactors = FALSE, row.names = NULL ) - } else { - cond <- NULL } - if (!is_empty_object(smooth_terms)) { - smooth <- data.frame( + if (is_empty_object(smooth_terms)) { + smooth_pars <- NULL + } else { + smooth_pars <- data.frame( Parameter = names(smooth_terms), Estimate = smooth_terms, Component = "smooth_terms", stringsAsFactors = FALSE, row.names = NULL ) - } else { - smooth <- NULL } pars <- switch(component, all = , - location = rbind(cond, smooth), + location = rbind(cond, smooth_pars), conditional = cond, - smooth_terms = smooth + smooth_terms = smooth_pars ) if (!component %in% c("all", "location")) { diff --git a/R/get_parameters_others.R b/R/get_parameters_others.R index 1b536a65b..feb4e4d79 100644 --- a/R/get_parameters_others.R +++ b/R/get_parameters_others.R @@ -38,6 +38,35 @@ get_parameters.betareg <- function(x, } +#' @rdname get_parameters.betareg +#' @export +get_parameters.glmgee <- function(x, component = c("all", "conditional", "dispersion"), ...) { + component <- match.arg(component) + + junk <- utils::capture.output({ + cs <- suppressWarnings(stats::coef(summary(x, corr = FALSE))) + }) + est <- stats::na.omit(cs[, "Estimate"]) + + out <- data.frame( + Parameter = names(est), + Estimate = as.vector(est), + Component = "conditional", + stringsAsFactors = FALSE, + row.names = NULL + ) + + # mark dispersion parameter + out$Component[out$Parameter == "Dispersion"] <- "dispersion" + + if (component != "all") { + out <- out[out$Component == component, , drop = FALSE] + } + + text_remove_backticks(out) +} + + #' @export get_parameters.nestedLogit <- function(x, component = "all", verbose = TRUE, ...) { cf <- as.data.frame(stats::coef(x)) @@ -204,22 +233,22 @@ get_parameters.mvord <- function(x, thresholds$Parameter <- rownames(thresholds) thresholds$Response <- gsub("(.*)\\s(.*)", "\\1", thresholds$Parameter) # coefficients - coefficients <- as.data.frame(s$coefficients) - coefficients$Parameter <- rownames(coefficients) - coefficients$Response <- gsub("(.*)\\s(.*)", "\\2", coefficients$Parameter) + model_coef <- as.data.frame(s$coefficients) + model_coef$Parameter <- rownames(model_coef) + model_coef$Response <- gsub("(.*)\\s(.*)", "\\2", model_coef$Parameter) - if (!all(coefficients$Response %in% thresholds$Response)) { + if (!all(model_coef$Response %in% thresholds$Response)) { resp <- unique(thresholds$Response) - for (i in coefficients$Response) { - coefficients$Response[coefficients$Response == i] <- resp[grepl(paste0(i, "$"), resp)] + for (i in model_coef$Response) { + model_coef$Response[model_coef$Response == i] <- resp[grepl(paste0(i, "$"), resp)] } } params <- data.frame( - Parameter = c(thresholds$Parameter, coefficients$Parameter), - Estimate = c(unname(thresholds[, "Estimate"]), unname(coefficients[, "Estimate"])), - Component = c(rep("thresholds", nrow(thresholds)), rep("conditional", nrow(coefficients))), - Response = c(thresholds$Response, coefficients$Response), + Parameter = c(thresholds$Parameter, model_coef$Parameter), + Estimate = c(unname(thresholds[, "Estimate"]), unname(model_coef[, "Estimate"])), + Component = c(rep("thresholds", nrow(thresholds)), rep("conditional", nrow(model_coef))), + Response = c(thresholds$Response, model_coef$Response), stringsAsFactors = FALSE, row.names = NULL ) diff --git a/R/get_statistic.R b/R/get_statistic.R index 431d3f8d9..8224b4af3 100644 --- a/R/get_statistic.R +++ b/R/get_statistic.R @@ -1128,6 +1128,26 @@ get_statistic.negbinirr <- get_statistic.logitor # Other models ------------------------------------------------------- +#' @export +get_statistic.glmgee <- function(x, ...) { + junk <- utils::capture.output({ + cs <- suppressWarnings(stats::coef(summary(x, corr = FALSE))) + }) + stat <- stats::na.omit(cs[, "z-value"]) + + out <- data.frame( + Parameter = names(stat), + Statistic = as.vector(stat), + stringsAsFactors = FALSE, + row.names = NULL + ) + + out <- text_remove_backticks(out) + attr(out, "statistic") <- find_statistic(x) + out +} + + #' @export get_statistic.nestedLogit <- function(x, component = "all", verbose = TRUE, ...) { cf <- as.data.frame(stats::coef(x)) diff --git a/R/helper_functions.R b/R/helper_functions.R index 358bb954d..88b9509f1 100644 --- a/R/helper_functions.R +++ b/R/helper_functions.R @@ -125,7 +125,8 @@ model, c( "MCMCglmm", "gee", "LORgee", "mixor", "clmm2", "felm", "feis", "bife", - "BFBayesFactor", "BBmm", "glimML", "MANOVA", "RM", "cglm", "glmm" + "BFBayesFactor", "BBmm", "glimML", "MANOVA", "RM", "cglm", "glmm", + "glmgee" ) ) diff --git a/R/is_model.R b/R/is_model.R index 078ce5dfc..5e9d651b9 100644 --- a/R/is_model.R +++ b/R/is_model.R @@ -86,7 +86,7 @@ is_regression_model <- function(x) { # g -------------------- "gam", "Gam", "GAMBoost", "gamlr", "gamlss", "gamm", "gamm4", "garch", "gbm", "gee", "geeglm", "gjrm", "glht", "glimML", "Glm", "glm", - "glmaag", "glmbb", "glmboostLSS", "glmc", "glmdm", "glmdisc", + "glmaag", "glmbb", "glmboostLSS", "glmc", "glmdm", "glmdisc", "glmgee", "glmerMod", "glmlep", "glmm", "glmmadmb", "glmmEP", "glmmFit", "glmmfields", "glmmLasso", "glmmPQL", "glmmTMB", "glmnet", "glmrob", "glmRob", "glmx", "gls", "gmnl", "gmm", "gnls", "gsm", "ggcomparisons", @@ -166,7 +166,7 @@ is_regression_model <- function(x) { if (isTRUE(regression_only)) { out <- setdiff(out, c( "emmGrid", "emm_list", "htest", "pairwise.htest", "summary.lm", - "marginaleffects", "marginaleffects.summary" + "marginaleffects", "marginaleffects.summary", "ggcomparisons" )) } diff --git a/R/link_function.R b/R/link_function.R index 32e0176d4..49c15be34 100644 --- a/R/link_function.R +++ b/R/link_function.R @@ -512,6 +512,9 @@ link_function.bife <- function(x, ...) { x$family$linkfun } +#' @export +link_function.glmgee <- link_function.bife + #' @export link_function.cpglmm <- function(x, ...) { diff --git a/R/link_inverse.R b/R/link_inverse.R index eab0f4987..5237bcabc 100644 --- a/R/link_inverse.R +++ b/R/link_inverse.R @@ -479,6 +479,9 @@ link_inverse.bife <- function(x, ...) { x$family$linkinv } +#' @export +link_inverse.glmgee <- link_inverse.bife + #' @export link_inverse.glmmadmb <- function(x, ...) { diff --git a/R/model_info.R b/R/model_info.R index 2b09840c7..761a131fe 100644 --- a/R/model_info.R +++ b/R/model_info.R @@ -1001,6 +1001,19 @@ model_info.glmmadmb <- function(x, ...) { } +#' @export +model_info.glmgee <- function(x, ...) { + faminfo <- x$family + .make_family( + x = x, + fitfam = faminfo$family, + logit.link = faminfo$link == "logit", + link.fun = faminfo$link, + ... + ) +} + + #' @export model_info.cpglmm <- function(x, ...) { link <- parse(text = safe_deparse(x@call))[[1]]$link diff --git a/R/n_obs.R b/R/n_obs.R index 3461b5a0d..a0efe3de6 100644 --- a/R/n_obs.R +++ b/R/n_obs.R @@ -620,6 +620,12 @@ n_obs.wbgee <- function(x, ...) { } +#' @export +n_obs.glmgee <- function(x, ...) { + length(x$fitted.values) +} + + #' @export n_obs.Rchoice <- function(x, ...) { nrow(x$mf) diff --git a/R/utils_model_info.R b/R/utils_model_info.R index 457907af1..7bf427308 100644 --- a/R/utils_model_info.R +++ b/R/utils_model_info.R @@ -56,7 +56,7 @@ "glm", "gee", "glmmTMB", "glmerMod", "merMod", "stanreg", "MixMod", "logistf", "bigglm", "brglm", "feglm", "geeglm", "glmm", "glmmadmb", "glmmPQL", "glmrob", "glmRob", "logitmfx", "logitor", "logitr", - "mixed", "mixor", "svyglm" + "mixed", "mixor", "svyglm", "glmgee" ) ) diff --git a/man/find_parameters.averaging.Rd b/man/find_parameters.averaging.Rd index f8ee33e19..7e853989e 100644 --- a/man/find_parameters.averaging.Rd +++ b/man/find_parameters.averaging.Rd @@ -2,6 +2,7 @@ % Please edit documentation in R/find_parameters_other.R \name{find_parameters.averaging} \alias{find_parameters.averaging} +\alias{find_parameters.glmgee} \alias{find_parameters.betareg} \alias{find_parameters.DirichletRegModel} \alias{find_parameters.mjoint} @@ -10,6 +11,13 @@ \usage{ \method{find_parameters}{averaging}(x, component = c("conditional", "full"), flatten = FALSE, ...) +\method{find_parameters}{glmgee}( + x, + component = c("all", "conditional", "dispersion"), + flatten = FALSE, + ... +) + \method{find_parameters}{betareg}( x, component = c("all", "conditional", "precision", "location", "distributional", diff --git a/man/get_parameters.betareg.Rd b/man/get_parameters.betareg.Rd index 009973916..7c4ff6159 100644 --- a/man/get_parameters.betareg.Rd +++ b/man/get_parameters.betareg.Rd @@ -2,6 +2,7 @@ % Please edit documentation in R/get_parameters_others.R \name{get_parameters.betareg} \alias{get_parameters.betareg} +\alias{get_parameters.glmgee} \alias{get_parameters.DirichletRegModel} \alias{get_parameters.averaging} \alias{get_parameters.glmx} @@ -17,6 +18,8 @@ ... ) +\method{get_parameters}{glmgee}(x, component = c("all", "conditional", "dispersion"), ...) + \method{get_parameters}{DirichletRegModel}( x, component = c("all", "conditional", "precision", "location", "distributional", diff --git a/tests/testthat/test-glmgee.R b/tests/testthat/test-glmgee.R new file mode 100644 index 000000000..e4fa5c0b2 --- /dev/null +++ b/tests/testthat/test-glmgee.R @@ -0,0 +1,107 @@ +skip_if_not_installed("glmtoolbox") + +data(spruces, package = "glmtoolbox") +m1 <- glmtoolbox::glmgee( + size ~ days + treat, + id = tree, + family = Gamma(log), + corstr = "AR-M-dependent(1)", + data = spruces +) + +test_that("model_info", { + expect_true(model_info(m1)$is_exponential) + expect_identical(model_info(m1)$family, "Gamma") +}) + +test_that("find_predictors", { + expect_identical(find_predictors(m1), list(conditional = c("days", "treat"))) + expect_identical( + find_predictors(m1, effects = "random"), + list(random = "tree") + ) + expect_identical( + find_predictors(m1, effects = "all", flatten = TRUE), + c("days", "treat", "tree") + ) +}) + +test_that("find_response", { + expect_identical(find_response(m1), "size") +}) + +test_that("get_response", { + expect_equal(get_response(m1), spruces$size, ignore_attr = TRUE) +}) + +test_that("find_random", { + expect_identical(find_random(m1), list(random = "tree")) +}) + +test_that("get_random", { + expect_equal(get_random(m1), spruces[, "tree", drop = FALSE], ignore_attr = TRUE) +}) + +test_that("get_predictors", { + expect_equal(get_predictors(m1), spruces[c("days", "treat")], tolerance = 1e-4) +}) + +test_that("link_inverse", { + expect_equal(link_inverse(m1)(0.2), 1.221403, tolerance = 1e-3) +}) + +test_that("link_fun", { + expect_equal(link_function(m1)(0.2), -1.609438, tolerance = 1e-3) +}) + +test_that("get_data", { + expect_identical(nrow(get_data(m1)), 1027L) + expect_named(get_data(m1), c("size", "days", "treat", "tree")) +}) + +test_that("find_formula", { + expect_length(find_formula(m1), 2) + expect_equal( + find_formula(m1), + list(conditional = size ~ days + treat, random = ~tree), + ignore_attr = TRUE + ) +}) + +test_that("find_terms", { + expect_identical( + find_terms(m1), + list(response = "size", conditional = c("days", "treat"), random = "tree") + ) +}) + +test_that("n_obs", { + expect_identical(n_obs(m1), 1027L) +}) + +test_that("find_parameters", { + expect_identical( + find_parameters(m1), + list( + conditional = c("(Intercept)", "days", "treatozone-enriched"), + dispersion = "Dispersion" + ) + ) + expect_identical( + find_parameters(m1, component = "conditional"), + list(conditional = c("(Intercept)", "days", "treatozone-enriched")) + ) + expect_identical(nrow(get_parameters(m1)), 4L) + expect_identical( + get_parameters(m1)$Parameter, + c("(Intercept)", "days", "treatozone-enriched", "Dispersion") + ) +}) + +test_that("is_multivariate", { + expect_false(is_multivariate(m1)) +}) + +test_that("find_statistic", { + expect_identical(find_statistic(m1), "z-statistic") +})