% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/stat-fit-deviations.R
\name{stat_fit_deviations}
\alias{stat_fit_deviations}
\alias{stat_fit_fitted}
\title{Residuals from model fit as segments}
\usage{
stat_fit_deviations(
  mapping = NULL,
  data = NULL,
  geom = "segment",
  position = "identity",
  ...,
  method = "lm",
  method.args = list(),
  n.min = 2L,
  formula = NULL,
  na.rm = FALSE,
  orientation = NA,
  show.legend = FALSE,
  inherit.aes = TRUE
)

stat_fit_fitted(
  mapping = NULL,
  data = NULL,
  geom = "point",
  method = "lm",
  method.args = list(),
  n.min = 2L,
  formula = NULL,
  position = "identity",
  na.rm = FALSE,
  orientation = NA,
  show.legend = FALSE,
  inherit.aes = TRUE,
  ...
)
}
\arguments{
\item{mapping}{The aesthetic mapping, usually constructed with
\code{\link[ggplot2]{aes}}. Only needs to be set at the layer level if you
are overriding the plot defaults.}

\item{data}{A layer specific dataset - only needed if you want to override
the plot defaults.}

\item{geom}{The geometric object to use display the data}

\item{position}{The position adjustment to use for overlapping points on this
layer}

\item{...}{other arguments passed on to \code{\link[ggplot2]{layer}}. This
can include aesthetics whose values you want to set, not map. See
\code{\link[ggplot2]{layer}} for more details.}

\item{method}{function or character If character, "lm", "rlm", "lqs", "rq"
and the name of a function to be matched, possibly followed by the fit
function's \code{method} argument separated by a colon (e.g.
\code{"rq:br"}). Functions implementing methods must accept arguments to
parameters \code{formula}, \code{data}, \code{weights} and \code{method}. A
\code{fitted()} method must exist for the returned model fit object class.}

\item{method.args}{named list with additional arguments.}

\item{n.min}{integer Minimum number of distinct values in the explanatory
variable (on the rhs of formula) for fitting to the attempted.}

\item{formula}{a "formula" object. Using aesthetic names instead of
original variable names.}

\item{na.rm}{a logical indicating whether NA values should be stripped
before the computation proceeds.}

\item{orientation}{character Either "x" or "y" controlling the default for
\code{formula}.}

\item{show.legend}{logical. Should this layer be included in the legends?
\code{NA}, the default, includes if any aesthetics are mapped. \code{FALSE}
never includes, and \code{TRUE} always includes.}

\item{inherit.aes}{If \code{FALSE}, overrides the default aesthetics, rather
than combining with them. This is most useful for helper functions that
define both data and aesthetics and should not inherit behaviour from the
default plot specification, e.g. \code{\link[ggplot2]{borders}}.}
}
\description{
\code{stat_fit_deviations} fits a linear model and returns fitted values and
residuals ready to be plotted as segments.
}
\details{
This stat can be used to automatically highlight residuals as
  segments in a plot of a fitted model equation. This stat only returns the
  fitted values and observations, the prediction and its confidence need to
  be separately added to the plot when desired. Thus, to make sure that the
  same model formula is used in all plot layers, it is best to save the
  formula as an object and supply this object as argument to the different
  statistics.

  A ggplot statistic receives as data a data frame that is not the one passed
  as argument by the user, but instead a data frame with the variables mapped
  to aesthetics and NA values removed. In other words, it respects the
  grammar of graphics and consequently within the model \code{formula} names
  of aesthetics like $x$ and $y$ should be used instead of the original
  variable names. This helps ensure that the model is fitted to the same data
  as plotted in other layers.
}
\note{
In the case of \code{method = "rq"} quantiles are fixed at \code{tau =
  0.5} unless \code{method.args} has length > 0. Parameter \code{orientation}
  is redundant as it only affects the default for \code{formula} but is
  included for consistency with \code{ggplot2}.
}
\section{Computed variables}{
 Data frame with same \code{nrow} as \code{data}
  as subset for each group containing five numeric variables. \describe{
  \item{x}{x coordinates of observations} \item{x.fitted}{x coordinates of
  fitted values} \item{y}{y coordinates of observations} \item{y.fitted}{y
  coordinates of fitted values}, \item{weights}{the weights
  passed as input to \code{lm()}, \code{rlm()}, or \code{lmrob()},
  using aesthetic weight. More generally the value returned by
  \code{weights()} }, \item{robustness.weights}{the "weights"
  of the applied minimization criterion relative to those of OLS in
  \code{rlm()}, or \code{lmrob()}} }

  To explore the values returned by this statistic we suggest the use of
  \code{\link[gginnards]{geom_debug}}. An example is shown below, where one
  can also see in addition to the computed values the default mapping of the
  fitted values to aesthetics \code{xend} and \code{yend}.
}

\examples{
# generate artificial data
library(MASS)

set.seed(4321)
x <- 1:100
y <- (x + x^2 + x^3) + rnorm(length(x), mean = 0, sd = mean(x^3) / 4)
my.data <- data.frame(x, y)

# plot residuals from linear model
ggplot(my.data, aes(x, y)) +
  geom_smooth(method = "lm", formula = y ~ x) +
  stat_fit_deviations(method = "lm", formula = y ~ x, colour = "red") +
  geom_point()

# plot residuals from linear model with y as explanatory variable
ggplot(my.data, aes(x, y)) +
  geom_smooth(method = "lm", formula = y ~ x, orientation = "y") +
  stat_fit_deviations(method = "lm", formula = x ~ y, colour = "red") +
  geom_point()

# as above using orientation
ggplot(my.data, aes(x, y)) +
  geom_smooth(method = "lm", orientation = "y") +
  stat_fit_deviations(orientation = "y", colour = "red") +
  geom_point()

# both regressions and their deviations
ggplot(my.data, aes(x, y)) +
  geom_smooth(method = "lm") +
  stat_fit_deviations(colour = "blue") +
  geom_smooth(method = "lm", orientation = "y", colour = "red") +
  stat_fit_deviations(orientation = "y", colour = "red") +
  geom_point()

# give a name to a formula
my.formula <- y ~ poly(x, 3, raw = TRUE)

# plot linear regression
ggplot(my.data, aes(x, y)) +
  geom_smooth(method = "lm", formula = my.formula) +
  stat_fit_deviations(formula = my.formula, colour = "red") +
  geom_point()

ggplot(my.data, aes(x, y)) +
  geom_smooth(method = "lm", formula = my.formula) +
  stat_fit_deviations(formula = my.formula, method = stats::lm, colour = "red") +
  geom_point()

# plot robust regression
ggplot(my.data, aes(x, y)) +
  stat_smooth(method = "rlm", formula = my.formula) +
  stat_fit_deviations(formula = my.formula, method = "rlm", colour = "red") +
  geom_point()

# plot robust regression with weights indicated by colour
my.data.outlier <- my.data
my.data.outlier[6, "y"] <- my.data.outlier[6, "y"] * 10
ggplot(my.data.outlier, aes(x, y)) +
  stat_smooth(method = MASS::rlm, formula = my.formula) +
  stat_fit_deviations(formula = my.formula, method = "rlm",
                      mapping = aes(colour = after_stat(weights)),
                      show.legend = TRUE) +
  scale_color_gradient(low = "red", high = "blue", limits = c(0, 1),
                       guide = "colourbar") +
  geom_point()

# plot quantile regression (= median regression)
ggplot(my.data, aes(x, y)) +
  stat_quantile(formula = my.formula, quantiles = 0.5) +
  stat_fit_deviations(formula = my.formula, method = "rq", colour = "red") +
  geom_point()

# plot quantile regression (= "quartile" regression)
ggplot(my.data, aes(x, y)) +
  stat_quantile(formula = my.formula, quantiles = 0.75) +
  stat_fit_deviations(formula = my.formula, colour = "red",
                      method = "rq", method.args = list(tau = 0.75)) +
  geom_point()

# inspecting the returned data with geom_debug()
gginnards.installed <- requireNamespace("gginnards", quietly = TRUE)

if (gginnards.installed)
  library(gginnards)

# plot, using geom_debug() to explore the after_stat data
if (gginnards.installed)
  ggplot(my.data, aes(x, y)) +
    geom_smooth(method = "lm", formula = my.formula) +
    stat_fit_deviations(formula = my.formula, geom = "debug") +
    geom_point()

if (gginnards.installed)
  ggplot(my.data.outlier, aes(x, y)) +
    stat_smooth(method = MASS::rlm, formula = my.formula) +
    stat_fit_deviations(formula = my.formula, method = "rlm", geom = "debug") +
    geom_point()

}
\seealso{
Other ggplot statistics for model fits: 
\code{\link{stat_fit_augment}()},
\code{\link{stat_fit_glance}()},
\code{\link{stat_fit_residuals}()},
\code{\link{stat_fit_tb}()},
\code{\link{stat_fit_tidy}()}
}
\concept{ggplot statistics for model fits}
