% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils.R
\name{summarize_topics}
\alias{summarize_topics}
\title{Summarize a topic model consistently across methods/functions}
\usage{
summarize_topics(theta, beta, dtm)
}
\arguments{
\item{theta}{numeric matrix whose rows represent P(topic|document)}

\item{beta}{numeric matrix whose rows represent P(token|topic)}

\item{dtm}{a document term matrix or term co-occurrence matrix of class \code{dgCMatrix}.}
}
\value{
Returns a \code{\link[tibble]{tibble}} with the following columns:
  \code{topic} is the integer row number of \code{beta}.
  \code{prevalence} is the frequency of each topic throughout the corpus it
    was trained on normalized so that it sums to 100.
  \code{coherence} makes a call to \code{\link[tidylda]{calc_prob_coherence}}
    using the default 5 most-probable terms in each topic.
  \code{top_terms} displays the top 3 most-probable terms in each topic.
}
\description{
Summarizes topics in a model. Called by \code{\link[tidylda]{tidylda}}
  and \code{\link[tidylda]{refit.tidylda}} and used to augment
  \code{\link[tidylda]{print.tidylda}}.
}
\note{
\code{prevalence} should be proportional to P(topic). It is calculated by
  weighting on document length. So, topics prevalent in longer documents get
  more weight than topics prevalent in shorter documents. It is calculated
  by

  \code{prevalence <- rowSums(dtm) * theta \%>\% colSums()}

  \code{prevalence <- (prevalence * 100) \%>\% round(3)}

  An alternative calculation (not implemented here) might have been

  \code{prevalence <- colSums(dtm) * t(beta) \%>\% colSums()}

  \code{prevalence <- (prevalence * 100) \%>\% round(3)}
}
\keyword{internal}
