% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/centroidFunctions.R
\name{centMode}
\alias{centMode}
\alias{centMin}
\alias{centOptimNA}
\alias{centroids}
\title{Centroid Functions for K-Centroids Clustering of (Ordinal) Categorical/Mixed Data}
\usage{
centMode(x)

centMin(x, dist, xrange = NULL)

centOptimNA(x, dist)
}
\arguments{
\item{x}{A numeric matrix or data frame.}

\item{dist}{The distance measure function used in \code{centMin} and \code{centOptimNA}.}

\item{xrange}{The range of the data in \code{x}. Currently only used for
\code{centMin}. Options are:
\itemize{
\item \code{NULL} (default): defaults to \code{"all"}.
\item \code{"all"}: uses the same minimum and maximum value for each column
of \code{x} by determining the whole range of values in the data
object \code{x}.
\item \code{"columnwise"}: uses different minimum and maximum values for
each column of \code{x} by determining the columnwise ranges of
values in the data object \code{x}.
\item A vector of \code{c(min, max)}: specifies the same minimum and maximum
value for each column of \code{x}.
\item A list of vectors \code{list(c(min1, max1), c(min2, max2),...)} with
length \code{ncol(x)}: specifies different minimum and maximum
values for each column of \code{x}.
}}
}
\value{
A named numeric vector containing the centroid values for each column of \code{x}.
}
\description{
Functions to calculate cluster centroids for K-centroids clustering that extend the
options available in package \pkg{flexclust}.

\code{centMode} calculates centroids based on the mode of each variable.
\code{centMin} determines centroids within a specified range which
minimize the supplied distance metric.  \code{centOptimNA} replicates
the behaviour of \code{\link[flexclust:distances]{flexclust::centOptim()}} but removes missing
values.

These functions are designed for use with \code{\link[flexclust:kcca]{flexclust::kcca()}} or
functions that are built upon it. Their use is easiest via the
wrapper \code{\link[=kccaExtendedFamily]{kccaExtendedFamily()}}.
}
\details{
\itemize{
\item \code{centMode}: Column-wise modes are used as centroids, and ties are
broken randomly. In combination with Simple Matching Distance (\code{distSimMatch}),
this results in the \code{kmodes} algorithm.
\item \code{centMin}: Column-wise centroids are calculated by minimizing
the specified distance measure between the values in \code{x} and all
possible levels of \code{x}.
\item \code{centOptimNA}: Column-wise centroids are calculated by
minimizing the specified distance measure via a general purpose
optimizer. Unlike in \code{\link[flexclust:distances]{flexclust::centOptim()}}, NAs are removed
from the starting search values and disregarded in the distance
calculation.
}
}
\examples{
# Example: Mode as centroid
dat <- data.frame(A = rep(2:5, 2),
                  B = rep(1:4, 2),
                  C = rep(c(1, 2, 4, 5), 2))
centMode(dat)
## within kcca
flexclust::kcca(dat, 3, family=kccaExtendedFamily('kModes')) #default centroid

# Example: Centroid is level for which distance is minimal
centMin(dat, flexclust::distManhattan, xrange = 'all')
## within kcca
flexclust::kcca(dat, 3,
                family=flexclust::kccaFamily(dist=flexclust::distManhattan,
                                             cent=\(y) centMin(y, flexclust::distManhattan,
                                                               xrange='all')))
                             
# Example: Centroid calculated by general purpose optimizer with NA removal
nas <- sample(c(TRUE, FALSE), prod(dim(dat)),
              replace=TRUE, prob=c(0.1,0.9)) |> 
       matrix(nrow=nrow(dat))
dat[nas] <- NA
centOptimNA(dat, flexclust::distManhattan)
## within kcca
flexclust::kcca(dat, 3, family=kccaExtendedFamily('kGower')) #default centroid
}
\seealso{
\code{\link[=kccaExtendedFamily]{kccaExtendedFamily()}},
\code{\link[flexclust:kcca]{flexclust::kcca()}}
}
