% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tdmPreprocUtils.r
\name{tdmPreGroupLevels}
\alias{tdmPreGroupLevels}
\title{Group the levels of factor variable in \code{dset[,colname]}.}
\usage{
tdmPreGroupLevels(dset, colname, opts)
}
\arguments{
\item{dset}{data frame}

\item{colname}{name of column to be re-grouped}

\item{opts}{list, here we need \itemize{
\item  PRE.Xpgroup   [0.99]
\item  PRE.MaxLevel  [32]  (32 is the maximum number of levels allowed for \code{\link[randomForest]{randomForest}})
}}
}
\value{
\code{dset}, a data frame with \code{dset[,colname]} re-grouped
}
\description{
This function reduces the number of levels for factor variables with too many levels.
It counts the cases in each level and orders them decreasingly. It binds the least
frequent levels together in a new level "OTHER" such that the remaining untouched
levels have more than opts$PRE.Xpgroup percent of all cases. OR it binds the levels with 
least cases together in "OTHER" such that the total number of new levels
is opts$PRE.MaxLevel. From these two choices for "OTHER" take the one which binds more 
variables in column "OTHER".
}
