\name{simulate.bic.glm}
\alias{simulate.bic.glm}
\title{
  A "simulate" method for a \code{BMA::bic.glm}
  object
}
\description{
  Simulate predictions for \code{newdata} for a
  model of class \code{\link[BMA]{bic.glm}}.  

  NOTES: The \strong{stats} package has a 
  \code{\link[stats]{simulate}} method for 
  "\code{lm}" objects which is used for 
  \code{\link{lm}} and \code{\link{glm}} 
  objects.  This \code{simulate.bic.glm} 
  function differs from the 
  \code{stats::\link[stats]{simulate}} function 
  in the same two fundamental and important ways 
  as the \code{simulate.glm} function:  

  \enumerate{
    \item{
      \code{stats::\link[stats]{simulate}}
      returns simulated data consistent with the 
      model fit assuming the estimated model 
      parameters are true and exact, i.e., 
      ignoring the uncertainty in parameter 
      estimation.  Thus, if \code{family = 
      poisson}, 
      \code{stats::\link[stats]{simulate}}
      returns nonnegative integers.  
    
      By contrast the \code{simulate.bic.glm}
      function documented here returns optionally 
      simulated \code{coef (coefficients)} plus 
      simulated values for the \code{link} and / 
      or \code{response} but currently \emph{NOT} 
      pseudo-random numbers on the scale of the 
      response.  
    }
    \item{ 
      The \code{simulate.bic.glm} function 
      documented here also accepts an optional 
      \code{newdata} argument, not accepted by 
      \code{stats::\link[stats]{simulate}}.  The 
      \code{stats::\link[stats]{simulate}} 
      function only returns simulated values for 
      the cases in the training set with no 
      possibilities for use for different sets 
      of conditions.  
    }
  }  
}
\usage{
\method{simulate}{bic.glm}(object, nsim = 1, 
    seed = NULL, newdata=NULL, 
    type = c("coef", "link", "response"), ...)
}
\arguments{
  \item{object}{
    an object representing a fitted model 
    of class \code{\link[BMA]{bic.glm}}.  
  }
  \item{nsim}{
    	number of response vectors to simulate. 
    	Defaults to 1.
  }
  \item{seed}{
    Argument passed as the first argument to 
    \code{\link{set.seed}} if not NULL.  
  }
  \item{newdata}{
    optionally, a \code{\link{data.frame}} 
    in which to look for variables with 
    which to predict. If omitted, predictors 
    used in fitting are used.  
  }
  \item{type}{
    the type of simulations required. 
    \itemize{
      \item{coef}{
        \code{type = "coef"} returns pseudo-
        random numbers generated by  
        \code{mvtnorm::\link[mvtnorm]{rmvnorm}} 
        with \code{mean} = \code{\link{coef}} 
        and \code{sigma} = \code{\link{vcov}} 
        for the component of the BMA mixture
        randomly selected for each simulation.  
        (Obviously, this does not use 
        \code{newdata}.)
      }
      \item{link}{
        \code{type='link'} returns simulations 
        on the scale of the linear predictors 
        using \code{\link[mvtnorm]{rmvnorm}} 
        applied  to randomly selected components 
        of the mixture with \code{mean} = 
        \code{\link{coef}} and \code{sigma} = 
        \code{\link{vcov}} for that component.  
        For a default binomial model, these are 
        of log-odds (probabilities on logit 
        scale).
      }
      \item{response}{
        \code{object[['linkinv']]} of \code{
        type = 'link'}.  For a binomial model, 
        these are predicted probabilities.  
      }
    }
  }
  \item{...}{
    further arguments passed to or from other
    methods.
  }
}
\details{
  1.  Save current \code{seed} and optionally set 
  it using code copied from 
  \code{stats:::simulate.lm}.  
  
  2.  \code{postprob <- object[['postprob']]; 
  x <- object[['x']]; y <- object[['y']]; 
  mle <- object[['mle']]; 
  linkinv <- object[['linkinv']]}.
  
  3.  \code{cl <- as.list(object[['call']]);  
  wt <- cl[['wt']]; 
  fam <- cl[['glm.family']]}

  4.  \code{if(is.null(newdata))newdata <- x} 
  else ensure that all levels of factors of 
  \code{newdata} match \code{x}.
  
  5.  \code{xMat <- model.matrix(~., x);  
  newMat <- model.matrix(~., newdata)}
  
  6.  \code{nComponents <- length(postprob);  
  nobs <- NROW(newdata)}
  
  7.  \code{sims <- matrix(NA, nobs, nsim)}
  
  8.  \code{rmdl <- sample(1:nComponents, nsims,
  TRUE, postprob)}
  
  9.  \code{for(Comp in 1:nComponents)
  nsimComp <- sum(rmdl==Comp); 
  refitComp <- glm.fit(xMat[, mle[Comp,]!=0], y, 
    wt, mle[Comp, mle[Comp,]!=0], family=fam); 
  simCoef <- mvtnorm::rmvnorm(nsimComp, coef
    (refitComp), vcov(rfitComp)); 
  sims[rmdl==Comp, ] <- tcrossprod(newMat[, 
    mle[Comp,]!=0], simCoef)}
  
  10.  If \code{length(type)} == 1:  return a
  \code{\link{data.frame}} with one column for 
  each desired simulation, consistent with the 
  behavior of the generic \code{\link{simulate}}
  applied to objects of class \code{lm} or 
  \code{glm}.  Otherwise, return a list of 
  \code{\link{data.frame}}s of the desired types.  
}
\value{
  Returns either a \code{\link{data.frame}} or a
  list of \code{\link{data.frame}}s depending on
  'type':  
  
  \item{\code{coef}}{
    a \code{\link{data.frame}} with \code{nsim} 
    columns and one row for each variable in the max
    model.  Values are non-zero for variables in the 
    model in the BMA mixture selected for that 
    simulation.  The non-zero values are generated 
    using \code{mvtnorm::\link[mvtnorm]{rmvnorm}}
    with  mean = \code{\link{coef}} and covariance 
    matrix = \code{\link{vcov}} of the model fit 
    to the subset of variables in that component
    model.  
  }
  \item{\code{link}}{
    a \code{\link{data.frame}} with \code{nsim} 
    columns of \code{nobs} values each giving the
    simulations on the \code{link} scale for each
    row in \code{newdata} (or the training set if
    \code{newdata} is not provided).  
  }
  \item{\code{response}}{
    a \code{\link{data.frame}} with \code{nsim}
    columns of \code{nobs} values each giving the
    simulations on the \code{response} scale, 
    being \code{linkinv} of the simulations on 
    the \code{link} scale.  
  }
  \item{if length(type)>1}{
    a list with simulations on the desired scales.  
  }
  
  The value also has an attribute "\code{seed}". 
  If argument \code{seed} is NULL, the attribute 
  is the value of \code{\link{.Random.seed}} 
  before the simulation started.  Otherwise it 
  is the value of the argument with a "kind"
  attribute with value \code{as.list(RNGkind())}.  
  
  NOTE:  This function currently may not work
  with a model fit that involves a multivariate 
  \code{link} or \code{response}.  
}
\author{
  Spencer Graves
}
\seealso{
  \code{\link[stats]{simulate}}
  \code{\link{simulate.glm}}
  \code{\link[BMA]{bic.glm}}
  \code{\link[BMA]{predict.bic.glm}}
  \code{\link{set.seed}}
  \code{\link[mvtnorm]{rmvnorm}}
}
%\references{}
\examples{
library(BMA)
library(mvtnorm)
##
## 1.  a factor and a numeric 
##
PoisReg2 <- data.frame(
  x=factor(rep(0:2, 2)), x1=rep(1:2, e=3))
bicGLM2 <- bic.glm(PoisReg2, y=1:6, poisson)

newDat2 <- data.frame(
  x=factor(rep(c(0, 2), 2), levels=0:2), 
  x1=3:6)
# NOTE:  Force newDat2['x'] to have the same levels
# as PoisReg2['x']

bicGLMsim2n <- simulate(bicGLM2, nsim=5, seed=2,
  newdata=newDat2[1:3,])

##
## 2.  One variable:  BMA returns
##     a mixture of constant & linear models
##
PoisRegDat <- data.frame(x=1:2, y=c(5, 10))
bicGLMex <- bic.glm(PoisRegDat['x'], 
                     PoisRegDat[, 'y'], poisson)
(postprob <- bicGLMex[['postprob']])
bicGLMex['mle']

# Simulate for the model data 
bicGLMsim <- simulate(bicGLMex, nsim=2, seed=1)  

# Simulate for new data
newDat <- data.frame(x=3:4, 
      row.names=paste0('f', 3:4))
bicGLMsin <- simulate(bicGLMex, nsim=3, seed=2, 
                      newdata=newDat)
                      
# Refit with bic.glm.matrix and confirm 
# that simulate returns the same answers

bicGLMat <- bic.glm(as.matrix(PoisRegDat['x']), 
                         PoisRegDat[, 'y'], poisson)
bicGLMatsim <- simulate(bicGLMat, nsim=3, seed=2, 
                      newdata=newDat)
\dontshow{stopifnot(}                      
all.equal(bicGLMsin, bicGLMatsim)                      
\dontshow{)}

# The same problem using bic.glm.formula                  
bicGLMfmla <- bic.glm(y ~ x, PoisRegDat, poisson)
bicGLMfmlsim <- simulate(bicGLMfmla, nsim=3, seed=2, 
                      newdata=newDat)
\dontshow{stopifnot(}                      
all.equal(bicGLMsin, bicGLMfmlsim)                      
\dontshow{)}
                      
##
## 2a.  Compute the correct answers manually 
##
GLMex1 <- glm(y~x, poisson, PoisRegDat)
GLMex0 <- glm(y~1, poisson, PoisRegDat)

postProb <- bicGLMfmla$postprob
nComp <- length(postProb)
newMat <- model.matrix(~., newDat)
set.seed(2)
(rmdl <- sample(1:nComp, 3, TRUE, 
          postprob))
GLMsim. <- matrix(NA, 2, 3)
dimnames(GLMsim.) <- list(
  rownames(newMat), 
  paste0('sim_', 1:3) )
          
sim1 <- mvtnorm::rmvnorm(2, coef(GLMex1), 
                         vcov(GLMex1))
sim0 <- mvtnorm::rmvnorm(1, coef(GLMex0), 
                         vcov(GLMex0))
GLMsim.[, rmdl==1] <- tcrossprod(newMat, sim1)
GLMsim.[, rmdl==2] <- tcrossprod(
          newMat[, 1, drop=FALSE], sim0)
                      
\dontshow{stopifnot(}
all.equal(bicGLMsin[[2]], data.frame(GLMsim.), 
    tolerance=4*sqrt(.Machine$double.eps))
# tcrossprod numeric precision is mediocre 
# for the constant model in this example.  
\dontshow{)}
}
\keyword{datagen}
