% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/weight.R
\name{weight}
\alias{weight}
\alias{weight.DNAbin}
\alias{weight.AAbin}
\alias{weight.list}
\alias{weight.dendrogram}
\alias{weight.default}
\title{Sequence weighting.}
\usage{
weight(x, ...)

\method{weight}{DNAbin}(x, method = "Henikoff", k = 5, ...)

\method{weight}{AAbin}(x, method = "Henikoff", k = 5, ...)

\method{weight}{list}(x, method = "Henikoff", k = 5, residues = NULL, gap = "-", ...)

\method{weight}{dendrogram}(x, method = "Gerstein", ...)

\method{weight}{default}(x, method = "Henikoff", k = 5, residues = NULL, gap = "-", ...)
}
\arguments{
\item{x}{a list or matrix of sequences
(usually a "DNAbin" or "AAbin" object).
Alternatively x can be an object of class \code{"dendrogram"}
for tree-base weighting.}

\item{...}{additional arguments to be passed between methods.}

\item{method}{a character string indicating the weighting method to be used.
Currently the only methods available are a modified version of the
maximum entropy weighting scheme proposed by
Henikoff and Henikoff (1994) (\code{method = "Henikoff"})
and the tree-based weighting scheme of Gerstein et al (1994)
(\code{method = "Gerstein"}).}

\item{k}{integer representing the k-mer size to be used.
Defaults to 5. Note that higher
values of k may be slow to compute and use excessive memory due to
the large numbers of calculations required.}

\item{residues}{either NULL (default; emitted residues are automatically
detected from the sequences), a case sensitive character vector
specifying the residue alphabet, or one of the character strings
"RNA", "DNA", "AA", "AMINO". Note that the default option can be slow for
large lists of character vectors. Furthermore, the default setting
\code{residues = NULL} will not detect rare residues that are not present
in the sequences, and thus will not assign them emission probabilities
in the model. Specifying the residue alphabet is therefore
recommended unless x is a "DNAbin" or "AAbin" object.}

\item{gap}{the character used to represent gaps in the alignment matrix
(if applicable). Ignored for \code{"DNAbin"} or \code{"AAbin"} objects.
Defaults to "-" otherwise.}
}
\value{
a named vector of weights, the sum of which is equal to
   the total number of sequences (average weight = 1).
}
\description{
Weighting schemes for DNA and amino acid sequences.
}
\details{
This is a generic function.
  If \code{method = "Henikoff"} the sequences are weighted
  using a modified version of the maximum entropy method proposed by
  Henikoff and Henikoff (1994). In this case the
  maximum entropy weights are calculated from a k-mer presence absence
  matrix instead of an alignment as originally described by
  Henikoff and Henikoff (1994).
  If \code{method = "Gerstein"} the agglomerative method of
  Gerstein et al (1994) is used to weight sequences based
  on their relatedness as derived from a phylogenetic tree.
  In this case a dendrogram is first derived using the
  \code{\link[kmer]{cluster}} function in the
  \code{\link[kmer]{kmer}} package.
  Methods are available for
  \code{"dendrogram"} objects, \code{"DNAbin"} and \code{"AAbin"}
  sequence objects (as lists or matrices) and sequences in standard
  character format provided either as lists or matrices.

  For further details on sequence weighting schemes see Durbin et al
  (1998) chapter 5.8.
}
\examples{
  ## weight the sequences in the woodmouse dataset from the ape package
  library(ape)
  data(woodmouse)
  woodmouse.weights <- weight(woodmouse)
  woodmouse.weights
}
\references{
Durbin R, Eddy SR, Krogh A, Mitchison G (1998) Biological
  sequence analysis: probabilistic models of proteins and nucleic acids.
  Cambridge University Press, Cambridge, United Kingdom.

  Gerstein M, Sonnhammer ELL, Chothia C (1994) Volume changes in protein evolution.
  \emph{Journal of Molecular Biology}, \strong{236}, 1067-1078.

  Henikoff S, Henikoff JG (1994) Position-based sequence weights.
  \emph{Journal of Molecular Biology}, \strong{243}, 574-578.
}
\author{
Shaun Wilkinson
}
