From e974402f72cbda902bfc9efcb1737e8faa5124cc Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 9 Feb 2025 20:54:21 +0100 Subject: [PATCH 1/3] Add `residualize_over_grid()` --- DESCRIPTION | 2 +- NAMESPACE | 3 + R/residualize_over_grid.R | 175 +++++++++++++++++++++++++++++++++++ man/residualize_over_grid.Rd | 70 ++++++++++++++ 4 files changed, 249 insertions(+), 1 deletion(-) create mode 100644 R/residualize_over_grid.R create mode 100644 man/residualize_over_grid.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 2bb935c49..fec501fbd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: modelbased Title: Estimation of Model-Based Predictions, Contrasts and Means -Version: 0.9.0.7 +Version: 0.9.0.8 Authors@R: c(person(given = "Dominique", family = "Makowski", diff --git a/NAMESPACE b/NAMESPACE index 9a565566e..9742b3eb5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -44,6 +44,8 @@ S3method(print_md,estimate_slopes) S3method(print_md,estimate_smooth) S3method(print_md,visualisation_matrix) S3method(reshape_grouplevel,estimate_grouplevel) +S3method(residualize_over_grid,data.frame) +S3method(residualize_over_grid,estimate_means) S3method(smoothing,data.frame) S3method(smoothing,numeric) S3method(standardize,estimate_contrasts) @@ -77,6 +79,7 @@ export(get_marginaltrends) export(print_html) export(print_md) export(reshape_grouplevel) +export(residualize_over_grid) export(smoothing) export(standardize) export(unstandardize) diff --git a/R/residualize_over_grid.R b/R/residualize_over_grid.R new file mode 100644 index 000000000..b92d0e166 --- /dev/null +++ b/R/residualize_over_grid.R @@ -0,0 +1,175 @@ +#' @title Compute partial residuals from a data grid +#' @name residualize_over_grid +#' +#' @description This function computes partial residuals based on a data grid, +#' where the data grid is usually a data frame from all combinations of factor +#' variables or certain values of numeric vectors. This data grid is usually used +#' as `newdata` argument in `predict()`, and can be created with +#' [`insight::get_datagrid()`]. +#' +#' @param grid A data frame representing the data grid, or an object of class +#' `estimate_means` or `estimate_predicted`, as returned by the different +#' `estimate_*()` functions. +#' @param model The model for which to compute partial residuals. The data grid +#' `grid` should match to predictors in the model. +#' @param predictor_name The name of the focal predictor, for which partial residuals +#' are computed. +#' @param protect_names Logical, if `TRUE`, preserves column names from the +#' `ggeffects` objects that is used as `grid`. +#' @param ... Currently not used. +#' +#' @section Partial Residuals: +#' For **generalized linear models** (glms), residualized scores are computed as +#' `inv.link(link(Y) + r)` where `Y` are the predicted values on the response +#' scale, and `r` are the *working* residuals. +#' +#' For (generalized) linear **mixed models**, the random effect are also +#' partialled out. +#' +#' @references +#' Fox J, Weisberg S. Visualizing Fit and Lack of Fit in Complex Regression +#' Models with Predictor Effect Plots and Partial Residuals. Journal of +#' Statistical Software 2018;87. +#' +#' @return A data frame with residuals for the focal predictor. +#' +#' @examplesIf requireNamespace("marginaleffects", quietly = TRUE) +#' set.seed(1234) +#' x1 <- rnorm(200) +#' x2 <- rnorm(200) +#' # quadratic relationship +#' y <- 2 * x1 + x1^2 + 4 * x2 + rnorm(200) +#' +#' d <- data.frame(x1, x2, y) +#' model <- lm(y ~ x1 + x2, data = d) +#' +#' pr <- estimate_means(model, c("x1", "x2")) +#' head(residualize_over_grid(pr, model)) +#' @export +residualize_over_grid <- function(grid, model, ...) { + UseMethod("residualize_over_grid") + +} + + +#' @rdname residualize_over_grid +#' @export +residualize_over_grid.data.frame <- function(grid, model, predictor_name, ...) { + + old_d <- insight::get_predictors(model) + fun_link <- insight::link_function(model) + inv_fun <- insight::link_inverse(model) + predicted <- grid[[predictor_name]] + grid[[predictor_name]] <- NULL + + is_fixed <- sapply(grid, function(x) length(unique(x))) == 1 + grid <- grid[, !is_fixed, drop = FALSE] + old_d <- old_d[, colnames(grid)[colnames(grid) %in% colnames(old_d)], drop = FALSE] + + if (!.is_grid(grid)) { + insight::format_error("Grid for partial residuals must be a fully crossed grid.") + } + + # for each var + best_match <- NULL + + for (p in colnames(old_d)) { + if (is.factor(old_d[[p]]) || is.logical(old_d[[p]]) || is.character(old_d[[p]])) { + grid[[p]] <- as.character(grid[[p]]) + old_d[[p]] <- as.character(old_d[[p]]) + } else { + grid[[p]] <- .validate_num(grid[[p]]) + } + + # if factor / logical / char in old data, find where it is equal + # if numeric in old data, find where it is closest + best_match <- .closest(old_d[[p]], grid[[p]], best_match = best_match) + } + + idx <- apply(best_match, 2, which) + idx <- sapply(idx, "[", 1) + + # extract working residuals + res <- .safe(stats::residuals(model, type = "working")) + + # if failed, and model linear, extract response residuals + if (is.null(res)) { + minfo <- insight::model_info(model) + if (minfo$is_linear) { + res <- .safe(insight::get_residuals(model, type = "response")) + } + } + + if (is.null(res)) { + insight::format_alert("Could not extract residuals.") + return(NULL) + } + + my_points <- grid[idx, , drop = FALSE] + my_points[[predictor_name]] <- inv_fun(fun_link(predicted[idx]) + res) # add errors + + my_points +} + + + +#' @rdname residualize_over_grid +#' @export +residualize_over_grid.estimate_means <- function(grid, model, ...) { + new_d <- as.data.frame(grid) + + relevant_columns <- unique(c( + attributes(grid)$trend, + attributes(grid)$contrast, + attributes(grid)$focal_terms, + attributes(grid)$coef_name + )) + + new_d <- new_d[colnames(new_d) %in% relevant_columns] + + residualize_over_grid(new_d, model, predictor_name = attributes(grid)$coef_name, ...) +} + + +# utilities -------------------------------------------------------------------- + + +.is_grid <- function(df) { + unq <- lapply(df, unique) + + if (prod(lengths(unq)) != nrow(df)) { + return(FALSE) + } + + df2 <- do.call(expand.grid, args = unq) + df2$..1 <- 1 + + res <- merge(df, df2, by = colnames(df), all = TRUE) + + sum(res$..1) == sum(df2$..1) +} + + +.closest <- function(x, target, best_match) { + if (is.numeric(x)) { + # AD <- outer(x, target, FUN = function(x, y) abs(x - y)) + AD <- abs(outer(x, target, FUN = `-`)) + idx <- apply(AD, 1, function(x) x == min(x)) + } else { + idx <- t(outer(x, target, FUN = `==`)) + } + + if (is.matrix(best_match)) { + idx <- idx & best_match + } + + idx +} + + +.validate_num <- function(x) { + if (!is.numeric(x)) { + x <- as.numeric(as.character(x)) + } + x +} diff --git a/man/residualize_over_grid.Rd b/man/residualize_over_grid.Rd new file mode 100644 index 000000000..cee9c3f94 --- /dev/null +++ b/man/residualize_over_grid.Rd @@ -0,0 +1,70 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/residualize_over_grid.R +\name{residualize_over_grid} +\alias{residualize_over_grid} +\alias{residualize_over_grid.data.frame} +\alias{residualize_over_grid.estimate_means} +\title{Compute partial residuals from a data grid} +\usage{ +residualize_over_grid(grid, model, ...) + +\method{residualize_over_grid}{data.frame}(grid, model, predictor_name, ...) + +\method{residualize_over_grid}{estimate_means}(grid, model, ...) +} +\arguments{ +\item{grid}{A data frame representing the data grid, or an object of class +\code{estimate_means} or \code{estimate_predicted}, as returned by the different +\verb{estimate_*()} functions.} + +\item{model}{The model for which to compute partial residuals. The data grid +\code{grid} should match to predictors in the model.} + +\item{...}{Currently not used.} + +\item{predictor_name}{The name of the focal predictor, for which partial residuals +are computed.} + +\item{protect_names}{Logical, if \code{TRUE}, preserves column names from the +\code{ggeffects} objects that is used as \code{grid}.} +} +\value{ +A data frame with residuals for the focal predictor. +} +\description{ +This function computes partial residuals based on a data grid, +where the data grid is usually a data frame from all combinations of factor +variables or certain values of numeric vectors. This data grid is usually used +as \code{newdata} argument in \code{predict()}, and can be created with +\code{\link[insight:get_datagrid]{insight::get_datagrid()}}. +} +\section{Partial Residuals}{ + +For \strong{generalized linear models} (glms), residualized scores are computed as +\code{inv.link(link(Y) + r)} where \code{Y} are the predicted values on the response +scale, and \code{r} are the \emph{working} residuals. + +For (generalized) linear \strong{mixed models}, the random effect are also +partialled out. +} + +\examples{ +\dontshow{if (requireNamespace("marginaleffects", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +set.seed(1234) +x1 <- rnorm(200) +x2 <- rnorm(200) +# quadratic relationship +y <- 2 * x1 + x1^2 + 4 * x2 + rnorm(200) + +d <- data.frame(x1, x2, y) +model <- lm(y ~ x1 + x2, data = d) + +pr <- estimate_means(model, c("x1", "x2")) +head(residualize_over_grid(pr, model)) +\dontshow{\}) # examplesIf} +} +\references{ +Fox J, Weisberg S. Visualizing Fit and Lack of Fit in Complex Regression +Models with Predictor Effect Plots and Partial Residuals. Journal of +Statistical Software 2018;87. +} From ff97ef2b5b16c49ef090bd28339e5fdbdb171edb Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 9 Feb 2025 20:55:28 +0100 Subject: [PATCH 2/3] docs --- R/residualize_over_grid.R | 4 ---- man/residualize_over_grid.Rd | 6 ------ 2 files changed, 10 deletions(-) diff --git a/R/residualize_over_grid.R b/R/residualize_over_grid.R index b92d0e166..d42f1a9d2 100644 --- a/R/residualize_over_grid.R +++ b/R/residualize_over_grid.R @@ -14,8 +14,6 @@ #' `grid` should match to predictors in the model. #' @param predictor_name The name of the focal predictor, for which partial residuals #' are computed. -#' @param protect_names Logical, if `TRUE`, preserves column names from the -#' `ggeffects` objects that is used as `grid`. #' @param ... Currently not used. #' #' @section Partial Residuals: @@ -112,8 +110,6 @@ residualize_over_grid.data.frame <- function(grid, model, predictor_name, ...) { } - -#' @rdname residualize_over_grid #' @export residualize_over_grid.estimate_means <- function(grid, model, ...) { new_d <- as.data.frame(grid) diff --git a/man/residualize_over_grid.Rd b/man/residualize_over_grid.Rd index cee9c3f94..29455dcb1 100644 --- a/man/residualize_over_grid.Rd +++ b/man/residualize_over_grid.Rd @@ -3,14 +3,11 @@ \name{residualize_over_grid} \alias{residualize_over_grid} \alias{residualize_over_grid.data.frame} -\alias{residualize_over_grid.estimate_means} \title{Compute partial residuals from a data grid} \usage{ residualize_over_grid(grid, model, ...) \method{residualize_over_grid}{data.frame}(grid, model, predictor_name, ...) - -\method{residualize_over_grid}{estimate_means}(grid, model, ...) } \arguments{ \item{grid}{A data frame representing the data grid, or an object of class @@ -24,9 +21,6 @@ residualize_over_grid(grid, model, ...) \item{predictor_name}{The name of the focal predictor, for which partial residuals are computed.} - -\item{protect_names}{Logical, if \code{TRUE}, preserves column names from the -\code{ggeffects} objects that is used as \code{grid}.} } \value{ A data frame with residuals for the focal predictor. From 12d2711cd8753aed98321fcaf62b8338ec53c4d0 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 9 Feb 2025 21:37:07 +0100 Subject: [PATCH 3/3] plot --- R/visualisation_recipe.R | 2 + R/visualisation_recipe_internal.R | 51 +++++++++++++++++++ ...visualisation_recipe.estimate_predicted.Rd | 1 + 3 files changed, 54 insertions(+) diff --git a/R/visualisation_recipe.R b/R/visualisation_recipe.R index 67b57ab38..22b20ce2d 100644 --- a/R/visualisation_recipe.R +++ b/R/visualisation_recipe.R @@ -118,6 +118,7 @@ #' @export visualisation_recipe.estimate_predicted <- function(x, show_data = FALSE, + show_residuals = FALSE, point = NULL, line = NULL, pointrange = NULL, @@ -129,6 +130,7 @@ visualisation_recipe.estimate_predicted <- function(x, .visualization_recipe( x, show_data = show_data, + show_residuals = show_residuals, point = point, line = line, pointrange = pointrange, diff --git a/R/visualisation_recipe_internal.R b/R/visualisation_recipe_internal.R index 0395a35b5..e53a7d66e 100644 --- a/R/visualisation_recipe_internal.R +++ b/R/visualisation_recipe_internal.R @@ -182,6 +182,7 @@ #' @keywords internal .visualization_recipe <- function(x, show_data = TRUE, + show_residuals = FALSE, point = NULL, line = NULL, pointrange = NULL, @@ -219,6 +220,15 @@ } + # add residual data as next lowest layer + if (show_residuals) { + layers[[paste0("l", l)]] <- .visualization_recipe_residuals(x, aes) + # Update with additional args + if (!is.null(point)) layers[[paste0("l", l)]] <- utils::modifyList(layers[[paste0("l", l)]], point) + l <- l + 1 + } + + # intercept line for slopes ---------------------------------- if (inherits(x, "estimate_slopes")) { layers[[paste0("l", l)]] <- insight::compact_list(list( @@ -414,3 +424,44 @@ out } + + +# residuals ---------------------------------------------------------------- + + +#' @keywords internal +.visualization_recipe_residuals <- function(x, aes) { + model <- attributes(x)$model + residual_data <- residualize_over_grid(x, model) + + # Default changes for binomial models + shape <- 16 + stroke <- 0 + if (insight::model_info(model)$is_binomial) { + shape <- "|" + stroke <- 1 + } + + out <- list( + geom = "point", + data = residual_data, + aes = list( + y = y, + x = aes$x, + color = aes$color, + alpha = aes$alpha + ), + height = 0, + shape = shape, + stroke = stroke + ) + + # set default alpha, if not mapped by aes + if (is.null(aes$alpha)) { + out$alpha <- 1 / 3 + } else { + out$alpha <- NULL + } + + out +} diff --git a/man/visualisation_recipe.estimate_predicted.Rd b/man/visualisation_recipe.estimate_predicted.Rd index 92f6f7226..4a5b0c525 100644 --- a/man/visualisation_recipe.estimate_predicted.Rd +++ b/man/visualisation_recipe.estimate_predicted.Rd @@ -9,6 +9,7 @@ \method{visualisation_recipe}{estimate_predicted}( x, show_data = FALSE, + show_residuals = FALSE, point = NULL, line = NULL, pointrange = NULL,