% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/misc_functions.R
\name{wkmeans}
\alias{wkmeans}
\title{Weighted k-means for mixed-type data}
\usage{
wkmeans(conData, catData, conWeight, nclust, ...)
}
\arguments{
\item{conData}{The continuous variables. Must be coercible to a data frame.}

\item{catData}{The categorical variables, either as factors or dummy-coded variables. Must be coercible to a data frame.}

\item{conWeight}{The continuous weight; must be between 0 and 1. The categorical weight is \code{1-conWeight}.}

\item{nclust}{The number of clusters.}

\item{...}{Optional arguments passed to \code{kmeans}.}
}
\value{
A stats::kmeans results object, with additional slots \code{conCenters} and \code{catCenters} giving the actual centers adjusted for the weighting process.
}
\description{
Weighted k-means for mixed continuous and categorical variables. A
user-specified weight \code{conWeight} controls the relative contribution of the
variable types to the cluster solution.
}
\details{
A simple adaptation of \code{stats::kmeans} to mixed-type data.  Continuous
variables are multiplied by the input parameter \code{conWeight}, and categorical
variables are multipled by \code{1-conWeight}. If factor variables are input to
\code{catData}, they are transformed to 0-1 dummy coded variables with the function
\code{dummyCodeFactorDf}.
}
\examples{
# Generate toy data set with poor quality categorical variables and good
# quality continuous variables.
set.seed(1)
dat <- genMixedData(200, nConVar=2, nCatVar=2, nCatLevels=4, nConWithErr=2,
  nCatWithErr=2, popProportions=c(.5,.5), conErrLev=0.3, catErrLev=0.8)
catDf <- data.frame(apply(dat$catVars, 2, factor))
conDf <- data.frame(scale(dat$conVars))

# A clustering that emphasizes the continuous variables
r1 <- with(dat,wkmeans(conDf, catDf, 0.9, 2))
table(r1$cluster, dat$trueID)

# A clustering that emphasizes the categorical variables; note argument
# passed to the underlying stats::kmeans function
r2 <- with(dat,wkmeans(conDf, catDf, 0.1, 2, nstart=4))
table(r2$cluster, dat$trueID)
}
\seealso{
\code{\link{dummyCodeFactorDf}}

\code{\link[stats]{kmeans}}
}

