% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/clustMD_UserFunctions.R
\name{clustMDparallel}
\alias{clustMDparallel}
\title{Run multiple clustMD models in parallel}
\usage{
clustMDparallel(X, CnsIndx, OrdIndx, G, models, Nnorms, MaxIter, store.params,
  scale, startCL = "hc_mclust", Ncores = NULL, autoStop = FALSE,
  ma.band = 50, stop.tol = NA)
}
\arguments{
\item{X}{a data matrix where the variables are ordered so that the 
continuous variables come first, the binary (coded 1 and 2) and ordinal
variables (coded 1, 2,...) come second and the nominal variables 
(coded 1, 2,...) are in last position.}

\item{CnsIndx}{the number of continuous variables in the data set.}

\item{OrdIndx}{the sum of the number of continuous, binary and ordinal 
variables in the data set.}

\item{G}{a vector containing the numbers of mixture components to be fitted.}

\item{models}{a vector of strings indicating which clustMD models are to be
fitted. This may be one of: \code{EII, VII, EEI, VEI, EVI, VVI} or 
\code{BD}.}

\item{Nnorms}{the number of Monte Carlo samples to be used for the 
intractable E-step in the presence of nominal data.}

\item{MaxIter}{the maximum number of iterations for which the (MC)EM 
algorithm should run.}

\item{store.params}{a logical variable indicating if the parameter estimates
at each iteration should be saved and returned by the \code{clustMD}
function.}

\item{scale}{a logical variable indicating if the continuous variables 
should be standardised.}

\item{startCL}{a string indicating which clustering method should be used to
initialise the (MC)EM algorithm. This may be one of "kmeans" (K means 
clustering), "hclust" (hierarchical clustering), "mclust" (finite 
mixture of Gaussian distributions), "hc_mclust" (model-based 
hierarchical clustering) or "random" (random cluster allocation).}

\item{Ncores}{the number of cores the user would like to use. Must be less
than or equal to the number of cores available.}

\item{autoStop}{a logical argument indicating whether the (MC)EM algorithm
    should use a stopping criterion to decide if convergence has been 
    reached. Otherwise the algorithm will run for \code{MaxIter} iterations. 

    If only continuous variables are present the algorithm will use Aitken's
    acceleration criterion with tolerance \code{stop.tol}. 

    If categorical variables are present, the stopping criterion is based
    on a moving average of the approximated log likelihood values. let $t$
    denote the current interation. The average of the \code{ma.band} most
    recent approximated log likelihood values is compared to the average of
    another \code{ma.band} iterations with a lag of 10 iterations.
    If this difference is less than the tolerance the algorithm will be 
    said to have converged.}

\item{ma.band}{the number of iterations to be included in the moving average
stopping criterion.}

\item{stop.tol}{the tolerance of the (MC)EM stopping criterion.}
}
\value{
An object of class \code{clustMDparallel} is returned. The output 
    components are as follows:
    \item{BICarray }{A matrix indicating the estimated BIC values for each
        of the models fitted.}
    \item{results }{A list containing the output for each of the models 
        fitted. Each entry of this list is a \code{clustMD} object. If the 
        algorithm failed to fit a particular model, the corresponding entry 
        of \code{results} will be \code{NULL}.}
}
\description{
This function allows the user to run multiple clustMD models in parallel.
The inputs are similar to \code{clustMD()} except \code{G} is now a vector
containing the the numbers of components the user would like to fit and 
\code{models} is a vector of strings indicating the covariance models the 
user would like to fit for each element of G. The user can specify the 
number of cores to be used or let the function detect the number available.
}
\examples{
    data(Byar)

    # Transformation skewed variables
    Byar$Size.of.primary.tumour <- sqrt(Byar$Size.of.primary.tumour)
    Byar$Serum.prostatic.acid.phosphatase <- 
    log(Byar$Serum.prostatic.acid.phosphatase)

    # Order variables (Continuous, ordinal, nominal)
    Y <- as.matrix(Byar[, c(1, 2, 5, 6, 8, 9, 10, 11, 3, 4, 12, 7)])

    # Start categorical variables at 1 rather than 0
    Y[, 9:12] <- Y[, 9:12] + 1

    # Standardise continuous variables
    Y[, 1:8] <- scale(Y[, 1:8])

    # Merge categories of EKG variable for efficiency
    Yekg <- rep(NA, nrow(Y))
    Yekg[Y[,12]==1] <- 1
    Yekg[(Y[,12]==2)|(Y[,12]==3)|(Y[,12]==4)] <- 2
    Yekg[(Y[,12]==5)|(Y[,12]==6)|(Y[,12]==7)] <- 3
    Y[, 12] <- Yekg

    \dontrun{
    res <- clustMDparallel(X = Y, G = 1:3, CnsIndx = 8, OrdIndx = 11, Nnorms = 20000,
    MaxIter = 500, models = c("EVI", "EII", "VII"), store.params = FALSE, scale = TRUE, 
    startCL = "kmeans", autoStop= TRUE, ma.band=30, stop.tol=0.0001)
  
    res$BICarray
}

}
\references{
McParland, D. and Gormley, I.C. (2016). Model based clustering 
    for mixed data: clustMD. Advances in Data Analysis and Classification, 
    10 (2):155-169.
}
\seealso{
\code{\link{clustMD}}
}

