% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/Baseline.R
\name{groupBaseline}
\alias{groupBaseline}
\title{Group BASELINe PDFs}
\usage{
groupBaseline(baseline, groupBy, nproc = 1)
}
\arguments{
\item{baseline}{\code{Baseline} object containing the \code{db} and the 
BASELINe posterior probability density functions 
(PDF) for each of the sequences, as returned by
\link{calcBaseline}.}

\item{groupBy}{The columns in the \code{db} slot of the \code{Baseline}
object by which to group the sequence PDFs.}

\item{nproc}{number of cores to distribute the operation over. If 
\code{nproc} = 0 then the \code{cluster} has already been
set and will not be reset.}
}
\value{
A \code{Baseline} object, containing the modified \code{db} and the BASELINe 
          posterior probability density functions (PDF) for each of the groups.
}
\description{
\code{groupBaseline} convolves groups of BASELINe posterior probability density 
functions (PDFs) to get combined PDFs for each group.
}
\details{
While the selection strengths predicted by BASELINe perform well on average, 
the estimates for individual sequences can be highly variable, especially when the 
number of mutations is small. 

To overcome this, PDFs from sequences grouped by biological or experimental relevance,
are convolved to from a single PDF for the selection strength. For example, sequences
from each sample may be combined together, allowing you to compare selection  across 
samples. This is accomplished through a fast numerical convolution technique.
}
\examples{
 
\donttest{
# Subset example data from alakazam
library(alakazam)
db <- subset(ExampleDb, ISOTYPE \%in\% c("IgA", "IgG"))
                 
# Calculate BASELINe
baseline <- calcBaseline(db, 
                         sequenceColumn="SEQUENCE_IMGT",
                         germlineColumn="GERMLINE_IMGT_D_MASK", 
                         testStatistic="focused",
                         regionDefinition=IMGT_V_NO_CDR3,
                         targetingModel=HS5FModel,
                         nproc=1)
                         
# Group PDFs by sample
grouped1 <- groupBaseline(baseline, groupBy="SAMPLE")
plotBaselineDensity(grouped1, idColumn="SAMPLE", colorElement="group", 
                    sigmaLimits=c(-1, 1))
 
# Group PDFs by both sample (between variable) and isotype (within variable)
grouped2 <- groupBaseline(baseline, groupBy=c("SAMPLE", "ISOTYPE"))
plotBaselineDensity(grouped2, idColumn="SAMPLE", groupColumn="ISOTYPE",
                    colorElement="group", colorValues=IG_COLORS,
                    sigmaLimits=c(-1, 1))

# Collapse previous isotype (within variable) grouped PDFs into sample PDFs
grouped3 <- groupBaseline(grouped2, groupBy="SAMPLE")
plotBaselineDensity(grouped3, idColumn="SAMPLE", colorElement="group",
                    sigmaLimits=c(-1, 1))
}
}
\references{
\enumerate{
  \item  Yaari G, et al. Quantifying selection in high-throughput immunoglobulin 
           sequencing data sets. 
           Nucleic Acids Res. 2012 40(17):e134.
 }
}
\seealso{
To generate the baseline object see \link{calcBaseline}.
          To calculate BASELINe statistics, such as the mean selection strength
          and the 95\% confidence interval, see \link{summarizeBaseline}.
}

