% 2013-06-12 A. Papritz
% R CMD Rdconv -t html -o bla.html cv.georob.Rd ; open bla.html; R CMD Rd2pdf --force cv.georob.Rd; 

\encoding{macintosh}
\name{cv.georob}
\alias{cv.georob}

\title{Cross-Validating a Spatial Linear Model Fitted by \code{georob}}

\description{
  This function assesses the goodness-of-fit of a spatial linear model by
  \var{K}-fold cross-validation.  In more detail, the model is re-fitted
  \var{K} times by robust (or Gaussian) REML, excluding each time
  \var{1/K}th of the data.  The re-fitted models are used to compute robust
  (or customary) external kriging predictions for the omitted observations.
  If the response variable is log-transformed then the kriging predictions
  can be optionally transformed back to the orginal scale of the
  measurements.  S3methods for evaluating and plotting diagnostic summaries
  of the cross-validation errors are decribed for the function
  \code{\link{validate.predictions}}.
  }
  
\usage{
\method{cv}{georob}(object, formula = NULL, subset = NULL, nset = 10, 
    seed = NULL, sets = NULL, duplicates.in.same.set = TRUE, 
    re.estimate = TRUE, param = object[["param"]], 
    fit.param = object[["initial.objects"]][["fit.param"]], 
    return.fit = FALSE, reduced.output = TRUE, lgn = FALSE, 
    ncores = min(nset, detectCores()), verbose = 0, ...)
}

\arguments{

  \item{object}{an object of class of \code{"georob"}, see
  \code{\link{georobObject}}.}

  \item{formula}{an optional formula for the regression model passed by
  \code{\link[stats]{update}} to \code{\link{georob}}, see \emph{Details}.}
  
  
  \item{subset}{an optional vector specifying a subset of observations
    to be used in the fitting process, see \emph{Details}.}
    
    
  \item{nset}{positive integer defining the number \var{K} of subsets 
  into which the data set is partitioned (default: \code{nset = 10}).}
  
  \item{seed}{optional integer seed to initialize random number generation,
  see \code{\link[base]{set.seed}}. Ignored if \code{sets} is non-\code{NULL}.}
  
  \item{sets}{an optional vector of the same length as the response vector
  of the fitted model and with positive integers taking values in
  \eqn{(1,2,\ldots,K)}, defining in this way the \eqn{K} subsets into which
  the data set is split.  If \code{sets = NULL} (default) the partition is
  randomly generated by \code{\link[base]{sample}} (using possibly
  \code{seed}).}
  
  \item{duplicates.in.same.set}{logical controlling whether replicated
  observations at a given location are assigned to the same subset when
  partitioning the data (default \code{TRUE}).}
  
  \item{re.estimate}{logical controlling whether the model is re-fitted to
  the reduced data sets before computing the kriging predictions
  (\code{TRUE}, default) or whether the model passed in \code{object} is
  used to compute the predictions for the omitted observations, see
  \emph{Details}.}
  
  \item{param}{an optional named numeric vector or a matrix or data frame
  with variogram parameters passed by \code{\link[stats]{update}} to
  \code{\link{georob}}, see \emph{Details}.  If \code{param} is a matrix
  (or a data frame) then it must have \code{nset} rows and
  \code{length(object[["param"]])} columns with initial values of variogram
  parameters for the \code{nset} cross-validation sets and
  \code{colnames(param)} must match \code{names(object[["param"]])}.}
  
  \item{fit.param}{an optional named logical vector or a matrix or data
  frame defining which variogram parameters should be adjusted when passed
  by \code{\link[stats]{update}} to \code{\link{georob}}, see
  \emph{Details}.  If \code{fit.param} is a matrix (or a data frame) then
  it must have \code{nset} rows and \code{length(object[["param"]])} columns
  with variogram parameter fitting flags for the \code{nset}
  cross-validation sets and \code{colnames(param)} must match
  \code{names(object[["param"]])}.}
  
  \item{return.fit}{logical controlling whether information about the fit
  should be returned for when re-estimating the model with the reduced data
  sets (default \code{TRUE}).}
  
  \item{reduced.output}{logical controlling whether the complete fitted
  model objects, fitted to the reduced data sets, are returned
  (\code{FALSE}) or only some components (\code{TRUE}, default, see
  \emph{Value}).  Ignored if \code{return.fit = FALSE}.}
  
  \item{lgn}{logical controlling whether kriging predictions of a
  log-transformed response should be transformed back to the original scale
  of the measurements (default \code{FALSE}).}
  
  \item{ncores}{positive integer controlling how many cores are used for
  parallelized computations, see \emph{Details}.}
  
  \item{verbose}{positive integer controlling logging of diagnostic
  messages to the console during model fitting.  Passed by
  \code{\link[stats]{update}} to \code{\link{georob}}, see \emph{Details}.}
    
    
  \item{\dots}{additional arguments passed by \code{\link[stats]{update}}
  to \code{\link{georob}}, see \emph{Details}.}

}

\details{
  \code{cv.georob} uses the package \pkg{parallel} for parallelized
  cross-validation.  By default, the function uses \eqn{K} CPUs
  but not more than are physically available (as returned by 
  \code{\link[parallel]{detectCores}}).
  
  \code{cv.georob} uses the function \code{\link[stats]{update}} to
  re-estimated the model with the reduced data sets.  Therefore, any
  argument accepted by \code{\link{georob}} can be changed when
  re-fitting the model.  Some of them (e.g. \code{formula}, \code{subset},
  etc.)  are explicit arguments of \code{cv.georob}, but
  also the remaining ones can be passed  to the function.
  
  Practitioners in geostatistics commonly cross-validate a fitted model
  without re-estimating the model parameters with the reduced data sets.
  This is clearly an unsound practice (see Hastie et al., 2009, sec.
  7.10).  Therefore, the argument \code{re.estimate} should always be set
  to \code{TRUE}.  The alternative is provided only for historic reasons.

}

\value{
  An object of class \code{cv.georob}, which is a list with the two
  components \code{pred} and \code{fit}.  
  
  \code{pred} is a data frame with the coordinates and the
  cross-validation prediction results with the following variables:
  
  \item{subset}{an integer vector defining to which of the \eqn{K} subsets
  an observation was assigned.}
  
  \item{data}{the values of the (possibly log-transformed) response.}
  
  \item{pred}{the kriging predictions.}
  
  \item{se}{the kriging standard errors.}
  
  If \code{lgn = TRUE} then \code{pred} has the additional variables:
  
  \item{lgn.data}{the untransformed response.}
  
  \item{lgn.pred}{the unbiasedly back-transformed predictions of a
  log-transformed response.}
  
  \item{lgn.se}{the kriging standard errors of the back-transformed
  predictions of a log-transformed response.}
  
  The second component \code{fit} contains either the full outputs of
  \code{georob}, fitted for the \eqn{K} reduced data set
  (\code{reduced.output = FALSE}), or \eqn{K} lists with the components
  \code{tuning.psi}, \code{converged}, \cr \code{convergence.code},
  \code{gradient}, \code{param}, \code{aniso$aniso}, \code{coefficients}
  along with the standard errors of
  \eqn{\widehat{\mbox{\boldmath$\beta$\unboldmath}}}{hat\beta}, see
  \code{\link{georobObject}}.
 
}


\references{
Hastie, T., Tibshirani, R. and Friedman, J. (2009) \emph{The Elements of
Statistical Learning; Data Mining, Inference and Prediction}.  New York:
Springer-Verlag.
}

\author{
   Andreas Papritz \email{andreas.papritz@env.ethz.ch}}

\seealso{
  \code{\link{validate.predictions}} for computing statistics of the cross-validation errors;
  \code{\link{georob}} for (robust) fitting of spatial linear models;
  \code{\link{georobObject}} for a description of the class \code{georob};
  \code{\link{predict.georob}} for computing robust kriging predictions.
}


\examples{
\dontrun{
data( meuse )

r.logzn <- georob(log(zinc) ~ sqrt(dist), data = meuse, locations = ~ x + y,
    variogram.model = "exponential",
    param = c( variance = 0.15, nugget = 0.05, scale = 200 ),
    tuning.psi = 1)

r.logzn.cv.1 <- cv(r.logzn, seed = 1, lgn = TRUE )
r.logzn.cv.2 <- cv(r.logzn, formula = .~. + ffreq, seed = 1, lgn = TRUE )

plot(r.logzn.cv.1, type = "bs")
plot(r.logzn.cv.2, type = "bs", add = TRUE, col = "red")

legend("topright", lty = 1, col = c( "black", "red"), bty = "n",
    legend = c("log(Zn) ~ sqrt(dist)", "log(Zn) ~ sqrt(dist) + ffreq"))}
}

\keyword{models}
\keyword{spatial}
