% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/NBDev.R
\name{NBDev}
\alias{NBDev}
\title{Fit a negative binomial GLM for a given design matrix}
\usage{
NBDev(counts, design, log.offset, nb.disp, print.progress = TRUE,
  bias.fold.tolerance = 1.1, chip.col.indicator)
}
\arguments{
\item{counts}{A matrix of integers obtained by counting reads across a 
genomic partition. Each row contains observations from a single window of
the genomic partition. Each column contains observations from a single
sample (either ChIP or control/input).}

\item{design}{A design matrix for the full model, including a column that 
indicates whether the observation is a ChIP sample (1) or control/input (0).
The number of rows must be \code{ncol(counts)}. Means are modeled with a log
link function.}

\item{log.offset}{A vector of log-scale, additive factors used to adjust 
estimated log-scale means for differences in library sizes across samples. 
Commonly used offsets include \code{log.offset=log(colSums(counts))} and 
\code{log.offset=log(apply(counts[rowSums(counts)!=0,],2,quantile,.75))}. If
\code{NULL}, the later offset is used.}

\item{nb.disp}{Estimated negative binomial dispersion parameters obtained from
either \code{\link{estimateGLMTrendedDisp}} or \code{\link{estimateGLMCommonDisp}} in 
package \code{\link{edgeR}}.  These estimates are treated as known and are
used to compute deviances.}

\item{print.progress}{logical. If TRUE, the function will provide an update on
what window (row number) is being analyzed. Updates occur frequently to 
start then eventually occur every 5000 windows.}

\item{bias.fold.tolerance}{A numerical value no smaller than 1. If the bias 
reduction of maximum likelihood estimates of (log) fold change is likely to 
result in a ratio of fold changes greater than this value, then bias 
reduction will be performed on such windows. Setting 
\code{bias.fold.tolerance=Inf} will completely disable bias reduction; 
setting \code{bias.fold.tolerance=1} will always perform bias reduction (see
details). If the constrained estimate differs from the unconstrained 
estimate, bias reduction is not performed.}

\item{chip.col.indicator}{A binary vector of length \code{ncol(design.matrix)}
that indicates which column of the full design matrix corresponds to the 
ChIP indicator.}
}
\value{
A list containing: 
 \item{dev}{Vector containing the deviance for each
 window under a negative binomial model fit to design matrix specified by
 \code{design}. This vector is passed along within the \code{\link{QL.fit}}
 function.} 
 \item{means}{Matrix of fitted mean values for each window.} 
 \item{parms}{Matrix of estimated coefficients for each window. Note that
 these are given on the log scale. (i.e., intercept coefficient reports
 log(average count) and non-intercept coefficients report estimated (and bias
 reduced, when appropriate) log fold-changes.)} 
 \item{dev.constrained}{Same as \code{dev}, subject to the
 constraint that the ChIP coefficient is non-negative. If fitting a reduced
 model with no ChIP coefficient, this will be \code{NA}.} 
 \item{means.constrained}{Same as \code{means}, subject to
 the constraint that the ChIP coefficient is non-negative. If fitting a
 reduced model with no ChIP coefficient, this will be \code{NA}.} 
 \item{parms.constrained}{Same as \code{parms},
 subject to the constraint that the ChIP coefficient is non-negative. If
 fitting a reduced model with no ChIP coefficient, this will be \code{NA}.}
}
\description{
A function called within \code{\link{QL.fit}} to fit a negative
 binomial GLM to each window for a given design matrix.
}
\details{
This functions fits, for each row of \code{counts}, a negative 
 binomial log-linear model through the GLM framework with the over-dispersion
 parameter fixed.
 
 Asymptotic biases of regression coefficients (i.e., log fold changes) are 
 then estimated by a plug-in estimate [eqn. (15.4) of McCullagh and Nelder, 
 1989] from the last iteration of iteratively reweighted least squares (IWLS)
 procedure. The fitted response values are then compared with or without such
 a bias term. If the ratio of fitted response values are larger than 
 \code{bias.fold.tolerance} for any observation and the unconstrained
 estimate equals the constrained estimate, then the bias-reduction (not 
 bias-correction) procedure according to Firth (1993) and Kosmidis & Firth 
 (2009) is applied to such rows of \code{counts}, by adjusting the score 
 equation with a term based on the observed information. Such bias-reduced
 estimates are more advantageous than directly subtracting the estimated bias
 from the maximum likelihood estimates as the latter may not exist (e.g.,
 when all counts in the control/input group are zero).
}
\references{
Firth (1993) "Bias reduction of maximum likelihood estimates"
\emph{Biometrika}, \bold{80}, 27--38.

Kosmidis and Firth (2009) "Bias reduction in exponential family nonlinear
models" \emph{Biometrika}, \bold{96}, 793--804.

Lund, Nettleton, McCarthy and Smyth (2012) "Detecting differential expression
in RNA-sequence data using quasi-likelihood with shrunken dispersion
estimates" emph{SAGMB}, \bold{11}(5).

McCullagh and Nelder (1989) "Generalized Linear Models", 2nd edition. London:
Chapman and Hall.
}
\author{
Emily Goren (\email{emily.goren@gmail.com}) based on modifications of
 code by Steve Lund and Long Qu.
}
