% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/permute.R
\name{effective_cor}
\alias{effective_cor}
\title{Estimates the effective correlation.}
\usage{
effective_cor(
  design_perm,
  sv,
  target_cor,
  calc_first = c("cor", "mean"),
  method = c("optmatch", "hungarian", "marriage"),
  iternum = 1000
)
}
\arguments{
\item{design_perm}{A numeric design matrix whose rows are to be permuted
(thus controlling the amount by which they are correlated with the
surrogate variables). The rows index the samples and the columns index
the variables. The intercept should \emph{not} be included
(though see Section "Unestimable Components").}

\item{sv}{A matrix of surrogate variables}

\item{target_cor}{A numeric matrix of target correlations between the
variables in \code{design_perm} and the surrogate variables. The
rows index the observed covariates and the columns index the surrogate
variables. That is, \code{target_cor[i, j]} specifies the target
correlation between the \code{i}th column of \code{design_perm} and the
\code{j}th surrogate variable. The surrogate variables are estimated
either using factor analysis or surrogate variable analysis (see the
parameter \code{use_sva}).
The number of columns in \code{target_cor} specifies the number of
surrogate variables. Set \code{target_cor} to \code{NULL} to indicate
that \code{design_perm} and the surrogate variables are independent.}

\item{calc_first}{Should we calculate the correlation of the mean
\code{design_perm} and \code{sv} (\code{calc_first = "mean"}), or
should we calculate the mean of the correlations between
\code{design_perm} and \code{sv} (\code{calc_first = "cor"})? This
should only be changed by expert users.}

\item{method}{Should we use the optimal matching technique from Hansen and
Klopfer (2006) (\code{"optmatch"}), the Gale-Shapley algorithm
for stable marriages (\code{"marriage"}) (Gale and Shapley, 1962)
as implemented in the matchingR package, or the Hungarian algorithm
(Papadimitriou and Steiglitz, 1982) (\code{"hungarian"})
as implemented in the clue package (Hornik, 2005)?
The \code{"optmatch"} method works really well
but does take a lot more computational time if you have, say, 1000
samples. If you use the \code{"optmatch"} option, you should note
that the optmatch package uses a super strange license:
\url{https://cran.r-project.org/package=optmatch/LICENSE}. If this
license doesn't work for you (because you are not in academia, or
because you don't believe in restrictive licenses), then
try out the \code{"hungarian"} method.}

\item{iternum}{The total number of simulated correlations to consider.}
}
\value{
A matrix of correlations. The rows index the observed covariates
    and the columns index the surrogate variables. Element (i, j) is
    the estimated correlation between the ith variable in
    \code{design_perm} and the jth variable in \code{sv}.
}
\description{
Will return the estimated correlation between the design matrix and the
surrogate variables when you assign a target correlation. The method is
described in detail in Gerard (2020).
}
\details{
This function permutes the rows of \code{design_perm} many times, each
time calculating the Pearson correlation between the columns of
\code{design_perm} and the columns of \code{sv}. It then returns the
averages of these Pearson correlations. The permutation is done
using \code{\link{permute_design}}.
}
\examples{
## Generate the design matrices and set target correlation -----------------
n <- 10
design_perm <- cbind(rep(c(0, 1), each = n / 2),
                     rep(c(0, 1), length.out = n))
sv <- matrix(rnorm(n))
target_cor <- matrix(c(0.9, 0.1), ncol = 1)

## Get estimated true correlation ------------------------------------------
## You should use a much larger iternum in practice
effective_cor(design_perm = design_perm,
              sv = sv,
              target_cor = target_cor,
              iternum = 10)

}
\references{
\itemize{
  \item{Gale, David, and Lloyd S. Shapley. "College admissions and the stability of marriage." The American Mathematical Monthly 69, no. 1 (1962): 9-15.}
  \item{Gerard, D (2020). "Data-based RNA-seq simulations by binomial thinning." \emph{BMC Bioinformatics}. 21(1), 206. doi: \href{https://doi.org/10.1186/s12859-020-3450-9}{10.1186/s12859-020-3450-9}.}
  \item{Hansen, Ben B., and Stephanie Olsen Klopfer. "Optimal full matching and related designs via network flows." Journal of computational and Graphical Statistics 15, no. 3 (2006): 609-627.}
  \item{Hornik K (2005). "A CLUE for CLUster Ensembles." Journal of Statistical Software, 14(12). doi: 10.18637/jss.v014.i12}
  \item{C. Papadimitriou and K. Steiglitz (1982), Combinatorial Optimization: Algorithms and Complexity. Englewood Cliffs: Prentice Hall.}
}
}
\author{
David Gerard
}
