% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/group.R
\name{group}
\alias{group}
\alias{window}
\alias{binning}
\alias{split}
\title{Create groups from your data.}
\usage{
group(data, n, method = "n_dist", starts_col = NULL,
  force_equal = FALSE, allow_zero = FALSE, return_factor = FALSE,
  descending = FALSE, randomize = FALSE, col_name = ".groups",
  remove_missing_starts = FALSE)
}
\arguments{
\item{data}{Data frame or vector.}

\item{n}{\emph{Dependent on method.}

 Number of groups (default), group size, list of group sizes,
 list of group starts, step size or prime number to start at. See \code{method}.

 Passed as whole number(s) and/or percentage(s) (\code{0} < \code{n} < \code{1})
 and/or character.

 Method \code{l_starts} allows \code{'auto'}.}

\item{method}{\code{greedy}, \code{n_dist}, \code{n_fill}, \code{n_last},
 \code{n_rand}, \code{l_sizes}, \code{l_starts}, \code{staircase}, or
 \code{primes}.

 \strong{Notice}: examples are sizes of the generated groups
 based on a vector with 57 elements.

 \subsection{greedy}{Divides up the data greedily given a specified group size
 \eqn{(e.g. 10, 10, 10, 10, 10, 7)}.

 \code{n} is group size}

 \subsection{n_dist (default)}{Divides the data into a specified number of groups and
 distributes excess data points across groups
 \eqn{(e.g. 11, 11, 12, 11, 12)}.

 \code{n} is number of groups}

 \subsection{n_fill}{Divides the data into a specified number of groups and
 fills up groups with excess data points from the beginning
 \eqn{(e.g. 12, 12, 11, 11, 11)}.

 \code{n} is number of groups}

 \subsection{n_last}{Divides the data into a specified number of groups.
 It finds the most equal group sizes possible,
 using all data points. Only the last group is able to differ in size
 \eqn{(e.g. 11, 11, 11, 11, 13)}.

 \code{n} is number of groups}

 \subsection{n_rand}{Divides the data into a specified number of groups.
 Excess data points are placed randomly in groups (only 1 per group)
 \eqn{(e.g. 12, 11, 11, 11, 12)}.

 \code{n} is number of groups}

 \subsection{l_sizes}{Divides up the data by a list of group sizes.
 Excess data points are placed in an extra group at the end.
 \eqn{(e.g. n = list(0.2,0.3) outputs groups with sizes (11,17,29))}.

 \code{n} is a list of group sizes}

 \subsection{l_starts}{Starts new groups at specified values of vector.

 \code{n} is a list of starting positions.
 Skip values by c(value, skip_to_number) where skip_to_number is the nth appearance of the value
 in the vector.
 Groups automatically start from first data point.

 \eqn{E.g. n = c(1,3,7,25,50) outputs groups with sizes (2,4,18,25,8)}.

 To skip: \eqn{given vector c("a", "e", "o", "a", "e", "o"), n = list("a", "e", c("o", 2))
 outputs groups with sizes (1,4,1)}.}

 If passing \eqn{n = 'auto'}  the starting positions are automatically found with
 \code{\link{find_starts}()}.

 \subsection{staircase}{Uses step size to divide up the data.
 Group size increases with 1 step for every group,
 until there is no more data
 \eqn{(e.g. 5, 10, 15, 20, 7)}.

 \code{n} is step size}

 \subsection{primes}{Uses prime numbers as group sizes.
 Group size increases to the next prime number
 until there is no more data.
 \eqn{(e.g. 5, 7, 11, 13, 17, 4)}.

 \code{n} is the prime number to start at}}

\item{starts_col}{Name of column with values to match in method \code{l_starts}
when data is a data frame. Pass \code{'index'} to use row names. (Character)}

\item{force_equal}{Create equal groups by discarding excess data points.
Implementation varies between methods. (Logical)}

\item{allow_zero}{Whether \code{n} can be passed as \code{0}. (Logical)}

\item{return_factor}{Return only grouping factor. (Logical)}

\item{descending}{Change direction of method. (Not fully implemented)
(Logical)}

\item{randomize}{Randomize the grouping factor (Logical)}

\item{col_name}{Name of added grouping factor}

\item{remove_missing_starts}{Recursively remove elements from the
list of starts that are not found.
For method \code{l_starts} only.
(Logical)}
}
\value{
Data frame grouped by new grouping factor
}
\description{
Divides data into groups by a range of methods.
 Creates a grouping factor with \code{1}s for group 1, \code{2}s for group 2, etc.
 Returns a data frame grouped by the grouping factor for easy use in
 \code{\link[magrittr]{\%>\%}} pipelines.
}
\examples{
# Attach packages
library(groupdata2)
library(dplyr)

# Create data frame
df <- data.frame("x"=c(1:12),
 "species" = rep(c('cat','pig', 'human'), 4),
 "age" = sample(c(1:100), 12))

# Using group()
df_grouped <- group(df, 5, method = 'n_dist')

# Using group() with dplyr pipeline to get mean age
df_means <- df \%>\%
 group(5, method = 'n_dist') \%>\%
 dplyr::summarise(mean_age = mean(age))

# Using group_factor() with l_starts
# "c('pig',2)" skips to the second appearance of
# "pig" after the first appearance of "cat"
df_grouped <- group(df,
                    list('cat', c('pig',2), 'human'),
                    method = 'l_starts',
                    starts_col = 'species')

}
\seealso{
Other grouping functions: \code{\link{all_groups_identical}},
  \code{\link{fold}}, \code{\link{group_factor}},
  \code{\link{partition}}, \code{\link{splt}}

Other staircase tools: \code{\link{\%primes\%}},
  \code{\link{\%staircase\%}}, \code{\link{group_factor}}

Other l_starts tools: \code{\link{differs_from_previous}},
  \code{\link{find_missing_starts}},
  \code{\link{find_starts}}, \code{\link{group_factor}}
}
\author{
Ludvig Renbo Olsen, \email{r-pkgs@ludvigolsen.dk}
}
\concept{grouping functions}
\concept{l_starts tools}
\concept{staircase tools}
