% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lda.R
\name{textmodel_lda}
\alias{textmodel_lda}
\title{Unsupervised Latent Dirichlet allocation}
\usage{
textmodel_lda(
  x,
  k = 10,
  max_iter = 2000,
  alpha = 0.5,
  beta = 0.1,
  gamma = 0,
  model = NULL,
  verbose = quanteda_options("verbose")
)
}
\arguments{
\item{x}{the dfm on which the model will be fit.}

\item{k}{the number of topics.}

\item{max_iter}{the maximum number of iteration in Gibbs sampling.}

\item{alpha}{the value to smooth topic-document distribution.}

\item{beta}{the value to smooth topic-word distribution.}

\item{gamma}{a parameter to determine change of topics between sentences or
paragraphs. When \code{gamma > 0}, Gibbs sampling of topics for the current
document is affected by the previous document's topics.}

\item{model}{a fitted LDA model; if provided, \code{textmodel_lda()} inherits
parameters from an existing model. See details.}

\item{verbose}{logical; if \code{TRUE} print diagnostic information during
fitting.}
}
\value{
\code{textmodel_seededlda()} and \code{textmodel_lda()} returns a list of model
parameters. \code{theta} is the distribution of topics over documents; \code{phi} is
the distribution of words over topics. \code{alpha} and \code{beta} are the small
constant added to the frequency of words to estimate \code{theta} and \code{phi},
respectively, in Gibbs sampling. Other elements in the list subject to
change.
}
\description{
Implements unsupervised Latent Dirichlet allocation (LDA). Users can run
Seeded LDA by setting \code{gamma > 0}.
}
\details{
To predict topics of new documents (i.e. out-of-sample), first,
create a new LDA model from a existing LDA model passed to \code{model} in
\code{textmodel_lda()}; second, apply \code{\link[=topics]{topics()}} to the new model. The \code{model}
argument takes objects created either by \code{textmodel_lda()} or
\code{textmodel_seededlda()}.
}
\examples{
\donttest{
require(seededlda)
require(quanteda)

corp <- head(data_corpus_moviereviews, 500)
toks <- tokens(corp, remove_punct = TRUE, remove_symbols = TRUE, remove_number = TRUE)
dfmt <- dfm(toks) \%>\%
    dfm_remove(stopwords('en'), min_nchar = 2) \%>\%
    dfm_trim(min_termfreq = 0.90, termfreq_type = "quantile",
             max_docfreq = 0.1, docfreq_type = "prop")

lda <- textmodel_lda(head(dfmt, 450), 6)
terms(lda)
topics(lda)
lda2 <- textmodel_lda(tail(dfmt, 50), model = lda) # new documents
topics(lda2)
}
}
\seealso{
\link[topicmodels:lda]{topicmodels}
}
\keyword{textmodel}
