% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgrove.R
\name{sgtree}
\alias{sgtree}
\title{Surrogate trees}
\usage{
sgtree(model, data, maxdeps = 1:8, cparam = 0, pfun = NULL, ...)
}
\arguments{
\item{model}{A model with corresponding predict function that returns numeric values.}

\item{data}{Data that must not (!) contain the target variable.}

\item{maxdeps}{Sequence of integers: Maximum depth of the trees.}

\item{cparam}{Complexity parameter for growing the trees.}

\item{pfun}{Optional predict function \code{function(model, data)} returning a real number. Default is the \code{predict()} method of the \code{model}.}

\item{...}{Further arguments to be passed to \code{\link[rpart]{rpart.control}} or the \code{predict()} method of the \code{model}.}
}
\value{
List of the results:

\item{explanation}{Matrix containing tree sizes, rules, explainability \eqn{{\Upsilon}} and the correlation between the predictions of the explanation and the true model.}

\item{rules}{List of rules for each tree.}

\item{model}{List of the \code{rpart} models.}
}
\description{
Compute surrogate trees of different depth to explain predictive machine learning model and analyze complexity vs. explanatory power.
}
\details{
A surrogate grove is trained via gradient boosting using \code{\link[rpart]{rpart}} on \code{data} with the predictions of using of the \code{model} as target variable.
Note that \code{data} must not contain the original target variable!
}
\examples{
library(randomForest)
library(pdp)
data(boston)
set.seed(42)
rf    <- randomForest(cmedv ~ ., data = boston)
data  <- boston[,-3] # remove target variable
maxds <- 1:7
st    <- sgtree(rf, data, maxds)
st
# rules for tree of depth 3
st$rules[["3"]]
# plot tree of depth 3
rpart.plot::rpart.plot(st$model[["3"]])

}
\references{
\itemize{
    \item {Szepannek, G. and Laabs, B.H. (2023): Can’t see the forest for the trees -- analyzing groves to explain random forests,
           Behaviormetrika, submitted}.
    \item {Szepannek, G. and Luebke, K.(2023): How much do we see? On the explainability of partial dependence plots for credit risk scoring,
           Argumenta Oeconomica 50, DOI: 10.15611/aoe.2023.1.07}.
  }
}
\author{
\email{gero.szepannek@web.de}
}
