% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/SimplexPath.R
\name{simplex_path_data}
\alias{simplex_path_data}
\title{Prepare data for visualising change in response across points in the simplex space}
\usage{
simplex_path_data(starts, ends, prop, add_var = list(), prediction = TRUE, ...)
}
\arguments{
\item{starts}{A data-frame specifying the starting proportions of the
compositional variables.
If a model object is specified then this data should contain all the
variables present in the model object including any additional non-compositional variables.
If a coefficient vector is specified then data should contain same number of
columns as the number of elements in the coefficient vector and a one-to-one
positional mapping would be assumed between the data columns and the
elements of the coefficient vector.}

\item{ends}{A data-frame specifying the ending proportions of the
compositional variables.
If a model object is specified then this data should contain all the
variables present in the model object including any additional non-compositional variables.
If a coefficient vector is specified then data should contain same number of
columns as the number of elements in the coefficient vector and a one-to-one
positional mapping would be assumed between the data columns and the
elements of the coefficient vector.}

\item{prop}{A vector of column names identifying the columns containing the
variable proportions (i.e., compositional columns) in the data.}

\item{add_var}{A list or data-frame specifying values for additional variables
in the model other than the proportions (i.e. not part of the
simplex design).
This could be useful for comparing the predictions across
different values for a non-compositional variable.
If specified as a list, it will be expanded to show a plot
for each unique combination of values specified, while if specified
as a data-frame, one plot would be generated for each row in the
data.}

\item{prediction}{A logical value indicating whether to pass the final data
to the `\link{add_prediction}` function and append the
predictions to the data. Default value is \code{TRUE}, but
often it would be desirable to make additional changes to
the data before making any predictions, so the user can set this to
\code{FALSE} and manually call the `\link{add_prediction}`
function.}

\item{...}{
  Arguments passed on to \code{\link[=add_prediction]{add_prediction}}
  \describe{
    \item{\code{model}}{A regression model object which will be used to make predictions
for the observations in `data`. Will override `coefficients`
if specified.}
    \item{\code{coefficients}}{If a regression model is not available (or can't be fit in R),
the regression coefficients from a model fit in some other
language can be used to calculate predictions. However, the
user would have to ensure there's an appropriate one-to-one
positional mapping between the data columns and the
coefficient values. Further, they would also have to provide
a variance-covariance matrix of the coefficients in the `vcov`
parameter if they want the associated CI for the prediction or
it would not be possible to calculate confidence/prediction
intervals using this method.}
    \item{\code{vcov}}{If regression coefficients are specified, then the variance-covariance
matrix of the coefficients can be specified here to calculate the
associated confidence interval around each prediction. Failure to do
so would result in no confidence intervals being returned. Ensure
`coefficients` and `vcov` have the same positional mapping with the data.}
    \item{\code{coeff_cols}}{If `coefficients` are specified and a one-to-one positional
mapping between the data-columns and coefficient vector is
not present. A character string or numeric index can be specified
here to reorder the data columns and match the corresponding
coefficient value to the respective data column. See the
"Use model coefficients for prediction" section in examples.}
    \item{\code{conf.level}}{The confidence level for calculating confidence/prediction
intervals. Default is 0.95.}
    \item{\code{interval}}{Type of interval to calculate:
\describe{
  \item{"none" (default)}{No interval to be calculated.}
  \item{"confidence"}{Calculate a confidence interval.}
  \item{"prediction"}{Calculate a prediction interval.}
}}
  }}
}
\value{
A data frame with the following columns appended at the end
 \describe{
   \item{.InterpConst}{The value of the interpolation constant for creating
                       the intermediate compositions between the start and end compositions.}
   \item{.Group}{An identifier column to discern between the different curves.}
   \item{.add_str_ID}{An identifier column for grouping the cartesian product
                      of all additional columns specified in `add_var`
                      parameter (if `add_var` is specified).}
   \item{.Pred}{The predicted response for each observation.}
   \item{.Lower}{The lower limit of the prediction/confidence interval for each observation.}
   \item{.Upper}{The upper limit of the prediction/confidence interval for each observation.}
 }
}
\description{
This is the helper function to prepare the underlying data for visualising
the change in a response variable between two points in a simplex space. The
two points specified by the `starts` and `ends` parameters are joined by a
straight line across the simplex space and the response is predicted for the
starting, ending and intermediate communities along this line. The associated
uncertainty along this prediction is also returned. The output of this function
can be passed to the \code{\link{simplex_path_plot}} function to visualise the
change in response.
}
\examples{
library(DImodels)

## Load data
data(sim2)

## Fit model
mod <- glm(response ~ (p1 + p2 + p3 + p4)^2 + 0, data = sim2)

## Create data for visualising change in response as we move from
## a species dominated by 70\% of one species to a monoculture of
## same species
head(simplex_path_data(starts = sim2[c(1, 5, 9, 13), 3:6],
                       ends = sim2[c(48, 52, 56, 60), 3:6],
                       prop = c("p1", "p2", "p3", "p4"),
                       model = mod))

## Create data for visualising change in response as we move from
## the centroid mixture to each monoculture
## If either of starts or ends have only row, then they'll be recycled
## to match the number of rows in the other
## Notice starts has only one row here, but will be recycled to have 4
## since ends has 4 four rows
head(simplex_path_data(starts = sim2[c(18),3:6],
                       ends = sim2[c(48, 52, 56, 60),3:6],
                       prop = c("p1", "p2", "p3", "p4"),
                       model = mod))

## Changing the confidence level for the prediction interval
## Use `conf.level` parameter
head(simplex_path_data(starts = sim2[c(18), 3:6],
                       ends = sim2[c(48, 52, 56, 60),3:6],
                       prop = c("p1", "p2", "p3", "p4"),
                       model = mod, conf.level = 0.99))

## Adding additional variables to the data using `add_var`
## Notice the new .add_str_ID column in the output
sim2$block <- as.numeric(sim2$block)
new_mod <- update(mod, ~ . + block, data = sim2)
head(simplex_path_data(starts = sim2[c(18), 3:6],
                       ends = sim2[c(48, 52, 56, 60), 3:6],
                       prop = c("p1", "p2", "p3", "p4"),
                       model = new_mod, conf.level = 0.99,
                       add_var = list("block" = c(1, 2))))

## Use predict = FALSE to get raw data structure
out_data <- simplex_path_data(starts = sim2[c(18), 3:6],
                              ends = sim2[c(48, 52, 56, 60), 3:6],
                              prop = c("p1", "p2", "p3", "p4"),
                              model = new_mod,
                              prediction = FALSE)
head(out_data)
## Manually add block
out_data$block = 3
## Call `add_prediction` to get prediction
head(add_prediction(data = out_data, model = new_mod, interval = "conf"))
}
