% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/bayes.r
\name{bayes}
\alias{bayes}
\title{Bayes model}
\usage{
bayes(
  y,
  M,
  X = NULL,
  R = NULL,
  model = c("BayesCpi", "BayesA", "BayesL", "BSLMM", "BayesR", "BayesB", "BayesC",
    "BayesBpi", "BayesRR"),
  map = NULL,
  Pi = NULL,
  fold = NULL,
  niter = 20000,
  nburn = 14000,
  windsize = NULL,
  wppa = 0.01,
  vg = NULL,
  dfvg = NULL,
  s2vg = NULL,
  ve = NULL,
  dfve = NULL,
  s2ve = NULL,
  lambda = 0,
  outfreq = 100,
  seed = 666666,
  threads = 4,
  verbose = TRUE
)
}
\arguments{
\item{y}{vector of phenotype, use 'NA' for the missings. The number and order of individuals of y, M, X, R should be exactly the same.}

\item{M}{numeric matrix of genotype with individuals in rows and markers in columns, NAs are not allowed.}

\item{X}{(optional) covariate matrix of all individuals, all values should be in digits, characters are not allowed, please use 'model.matrix.lm' function to prepare it.}

\item{R}{(optional) environmental random effects matrix of all individuals, NAs are not allowed for the individuals with phenotypic value.}

\item{model}{bayes model including: "BayesB", "BayesA", "BayesL", "BayesRR", "BayesBpi", "BayesC", "BayesCpi", "BayesR", "BSLMM".
\itemize{
\item "BayesRR": Bayes Ridge Regression, all SNPs have non-zero effects and share the same variance, equals to RRBLUP or GBLUP.
\item "BayesA": all SNPs have non-zero effects, and take different variance which follows an inverse chi-square distribution.
\item "BayesB": only a small proportion of SNPs (1-Pi) have non-zero effects, and take different variance which follows an inverse chi-square distribution.
\item "BayesBpi": the same with "BayesB", but 'Pi' is not fixed.
\item "BayesC": only a small proportion of SNPs (1-Pi) have non-zero effects, and share the same variance.
\item "BayesCpi": the same with "BayesC", but 'Pi' is not fixed.
\item "BayesL": BayesLASSO, all SNPs have non-zero effects, and take different variance which follows an exponential distribution.
\item "BSLMM": all SNPs have non-zero effects, and take the same variance, but a small proportion of SNPs have additional shared variance.
\item "BayesR": only a small proportion of SNPs have non-zero effects, and the SNPs are allocated into different groups, each group has the same variance.
}}

\item{map}{(optional, only for GWAS) the map information of genotype, at least 3 columns are: SNPs, chromosome, physical position.}

\item{Pi}{vector, the proportion of zero effect and non-zero effect SNPs, the first value must be the proportion of non-effect markers.}

\item{fold}{proportion of variance explained for groups of SNPs, the default is c(0, 0.0001, 0.001, 0.01).}

\item{niter}{the number of MCMC iteration.}

\item{nburn}{the number of iterations to be discarded.}

\item{windsize}{window size in bp for GWAS, the default is NULL.}

\item{wppa}{the threshold of genetic variance explained by single window, the default is 0.01.}

\item{vg}{prior value of genetic variance.}

\item{dfvg}{the number of degrees of freedom for the distribution of genetic variance.}

\item{s2vg}{scale parameter for the distribution of genetic variance.}

\item{ve}{prior value of residual variance.}

\item{dfve}{the number of degrees of freedom for the distribution of residual variance.}

\item{s2ve}{scale parameter for the distribution of residual variance.}

\item{lambda}{value of ridge regression for inverting a matrix.}

\item{outfreq}{frequency of information output on console, the default is 100.}

\item{seed}{seed for random sample.}

\item{threads}{number of threads used for OpenMP.}

\item{verbose}{whether to print the iteration information.}
}
\value{
the function returns a list containing
\describe{
\item{$mu}{the regression intercept}
\item{$pi}{estimated proportion of zero effect and non-zero effect SNPs}
\item{$beta}{estimated coefficients for all covariates}
\item{$r}{estimated environmental random effects}
\item{$vr}{estimated variance for all environmental random effect}
\item{$vg}{estimated genetic variance}
\item{$ve}{estimated residual variance}
\item{$alpha}{estimated effect size of all markers}
\item{$modfreq}{the frequency for markers to be included in the model during MCMC iteration, also known as posterior inclusive probability (PIP)}
\item{$g}{genomic estimated breeding value}
\item{$gwas}{WPPA is defined to be the window posterior probability of association, it is the ratio of the number of iterations that \eqn{Pw} (the proportion of the total genetic variance explained by the window \eqn{w}) > 1\% divided by the total number of MCMC iterations, WGVE is the explained genetic variance for each window}
}
}
\description{
Bayes linear regression model using individual level data
\deqn{y = X \beta + R r + M \alpha + e}
where \eqn{\beta} is a vector of estimated coefficient for covariates, and \eqn{r} is a vector of environmental random effects. \eqn{M} is a matrix of genotype covariate, \eqn{\alpha} is a vector of estimated marker effect size. \eqn{e} is a vector of residuals.
}
\examples{
# Load the example data attached in the package
pheno_file_path = system.file("extdata", "pheno.txt", package = "hibayes")
pheno = read.table(pheno_file_path, header=TRUE)
bfile_path = system.file("extdata", "geno", package = "hibayes")
data = read_plink(bfile_path, out=tempfile())
fam = data$fam
geno = data$geno
map = data$map

# Adjust the order of phenotype by genotype id
geno.id = fam[, 2]
pheno = pheno[match(geno.id, pheno[, 1]), ]

# Add fixed effects, covariates, and random effect
X <- model.matrix.lm(~as.numeric(scale)+as.factor(sex), data=pheno, na.action = "na.pass")
X <- X[, -1] #remove the intercept
# then fit the model as: fit = bayes(..., X=X, R=pheno[,c("group")], ...)

# For GS/GP
fit = bayes(y=pheno[, 2], M=geno, model="BayesR", niter=200, nburn=100, outfreq=10)
\donttest{
# For GWAS
fit = bayes(y=pheno[, 2], M=geno, map=map, windsize=1e6, model="BayesCpi")
}

}
\references{
Meuwissen, Theo HE, Ben J. Hayes, and Michael E. Goddard. "Prediction of total genetic value using genome-wide dense marker maps." Genetics 157.4 (2001): 1819-1829. \cr
de los Campos, G., Hickey, J. M., Pong-Wong, R., Daetwyler, H. D., and Calus, M. P. (2013). Whole-genome regression and prediction methods applied to plant and animal breeding. Genetics, 193(2), 327-345. \cr
Habier, David, et al. "Extension of the Bayesian alphabet for genomic selection." BMC bioinformatics 12.1 (2011): 1-12. \cr
Yi, Nengjun, and Shizhong Xu. "Bayesian LASSO for quantitative trait loci mapping." Genetics 179.2 (2008): 1045-1055. \cr
Zhou, Xiang, Peter Carbonetto, and Matthew Stephens. "Polygenic modeling with Bayesian sparse linear mixed models." PLoS genetics 9.2 (2013): e1003264. \cr
Moser, Gerhard, et al. "Simultaneous discovery, estimation and prediction analysis of complex traits using a Bayesian mixture model." PLoS genetics 11.4 (2015): e1004969. \cr
}
