% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/simulate_hierarchically_sparse_data.R
\name{genHierSparseData}
\alias{genHierSparseData}
\title{function to generate data with hierarchical sparsity}
\usage{
genHierSparseData(ncats, nvars, nobs, nobs.test = 100,
  hier.sparsity.param = 0.5, avg.hier.zeros = NULL,
  prop.zero.vars = 0.5, effect.size.max = 0.5,
  misspecification.prop = 0, family = c("gaussian", "binomial",
  "coxph"), sd = 1, snr = NULL, beta = NULL, tau = 10, covar = 0)
}
\arguments{
\item{ncats}{number of categories to stratify on}

\item{nvars}{number of variables}

\item{nobs}{number of observations per strata to simulate}

\item{nobs.test}{number of independent test observations per strata to simulate}

\item{hier.sparsity.param}{parameter between 0 and 1 which determines how much hierarchical sparsity there is. To achieve
a desired total level of sparsity among the variables with hierarchical sparsity, this parameter can be estimated using the
function 'estimate.hier.sparsity.param'}

\item{avg.hier.zeros}{desired percent of zero variables among the variables with hierarchical zero patterns. If this is specified, it will
override the given hier.sparsity.param value and estimate it. This takes a while}

\item{prop.zero.vars}{proportion of all variables that will be zero across all strata}

\item{effect.size.max}{maximum magnitude of the true effect sizes}

\item{misspecification.prop}{proportion of variables with hierarchical missingness misspecified}

\item{family}{family for the response variable}

\item{sd}{standard devation for gaussian simulations}

\item{snr}{signal-to-noise ratio (only used for \code{family = "gaussian"})}

\item{beta}{a matrix of true beta values. If given, then no beta will be created and data will be simulated from the given beta}

\item{tau}{rate parameter for \code{rexp()} for generating time-to-event outcomes}

\item{covar}{scalar, pairwise covariance term for covariates}
}
\description{
function to generate data with hierarchical sparsity
}
\examples{
set.seed(123)

dat.sim <- genHierSparseData(ncats = 3, nvars = 100, nobs = 200)

# estimate hier.sparsity.param for 0.15 total proportion of nonzero variables
# among vars with hierarchical zero patterns
\dontrun{
hsp <- estimate.hier.sparsity.param(ncats = 3, nvars = 50, avg.hier.zeros = 0.15, nsims = 100)
}
# the above results in the following value
hsp <- 0.6270698

# check that this does indeed achieve the desired level of sparsity
mean(replicate(50, mean(genHierSparseBeta(ncats = 3, 
                       nvars = 50, hier.sparsity.param = hsp) != 0)  ))
                           
dat.sim2 <- genHierSparseData(ncats = 3, nvars = 100, nobs = 200, hier.sparsity.param = hsp)

sparseBeta <- genHierSparseBeta(ncats = 3, nvars = 100, hier.sparsity.param = hsp)

## generate data with already generated beta
dat.sim3 <- genHierSparseData(ncats = 3, nvars = 100, nobs = 200, beta = sparseBeta)




## complete example:
## 50\% sparsity:
hsp <- 0.2626451

dat.sim <- genHierSparseData(ncats = 3, nvars = 25,
                             nobs = 150, nobs.test = 1000,
                             hier.sparsity.param = hsp,
                             prop.zero.vars = 0.5,
                             effect.size.max = 0.25,
                             family = "gaussian")

x        <- dat.sim$x
x.test   <- dat.sim$x.test
y        <- dat.sim$y
y.test   <- dat.sim$y.test
grp      <- dat.sim$group.ind
grp.test <- dat.sim$group.ind.test

fit.adapt <- cv.vennLasso(x, y,
                          grp,
                          adaptive.lasso = TRUE,
                          nlambda        = 25,
                          family         = "gaussian",
                          abs.tol        = 1e-5,
                          rel.tol        = 1e-5,
                          maxit          = 1000,
                          irls.maxit     = 15L,
                          gamma          = 0.2,
                          standardize    = FALSE,
                          intercept      = TRUE,
                          nfolds         = 3,
                          model.matrix   = TRUE)

preds.a <- predict(fit.adapt$vennLasso.fit, x.test, grp.test, s = fit.adapt$lambda.min,
                   type = 'response')


}
